run,step,metric,value,stderr FineVision,1000,ai2d_exact_match,0.2548575129533679,0.007843322436924496 FineVision,1000,average,0.27120689295763617, FineVision,1000,average_rank,2.8, FineVision,1000,chartqa_relaxed_overall,0.3308,0.009411906161401973 FineVision,1000,docvqa_val_anls,0.3528553494243383,0.005852289239342309 FineVision,1000,infovqa_val_anls,0.17320578642581314,0.006297063452679795 FineVision,1000,mme_total_score,977.4280712284914, FineVision,1000,mmmu_val_mmmu_acc,0.25222, FineVision,1000,mmstar_average,0.23215874078908072, FineVision,1000,ocrbench_ocrbench_accuracy,0.286, FineVision,1000,seedbench_seed_all,0.2563646470261256, FineVision,1000,textvqa_val_exact_match,0.3024,0.00628900296642181 FineVision,2000,ai2d_exact_match,0.26295336787564766,0.007923526907377255 FineVision,2000,average,0.3202068275596269, FineVision,2000,average_rank,2.6, FineVision,2000,chartqa_relaxed_overall,0.4688,0.009982508912777261 FineVision,2000,docvqa_val_anls,0.4452261510942785,0.00614755494712251 FineVision,2000,infovqa_val_anls,0.1820547866557169,0.006217861455795791 FineVision,2000,mme_total_score,1049.3036214485794, FineVision,2000,mmmu_val_mmmu_acc,0.24556, FineVision,2000,mmstar_average,0.21305462434540698, FineVision,2000,ocrbench_ocrbench_accuracy,0.395, FineVision,2000,seedbench_seed_all,0.258532518065592, FineVision,2000,textvqa_val_exact_match,0.41068000000000005,0.006697862330024289 FineVision,3000,ai2d_exact_match,0.25226683937823835,0.007816909588794397 FineVision,3000,average,0.3507423834414229, FineVision,3000,average_rank,2.6, FineVision,3000,chartqa_relaxed_overall,0.5028,0.010001843767601082 FineVision,3000,docvqa_val_anls,0.502653993831009,0.006267072346683124 FineVision,3000,infovqa_val_anls,0.21728617578189535,0.006796941784959762 FineVision,3000,mme_total_score,1170.2383953581434, FineVision,3000,mmmu_val_mmmu_acc,0.27556, FineVision,3000,mmstar_average,0.25432376938577683, FineVision,3000,ocrbench_ocrbench_accuracy,0.436, FineVision,3000,seedbench_seed_all,0.2792106725958866, FineVision,3000,textvqa_val_exact_match,0.43658,0.006766885462882726 FineVision,4000,ai2d_exact_match,0.2645725388601036,0.007939149662089447 FineVision,4000,average,0.36961781722974835, FineVision,4000,average_rank,2.7, FineVision,4000,chartqa_relaxed_overall,0.5312,0.009982508912777261 FineVision,4000,docvqa_val_anls,0.5374434618615119,0.0062905728113059655 FineVision,4000,infovqa_val_anls,0.2287924838861707,0.006994568698639919 FineVision,4000,mme_total_score,1155.203781512605, FineVision,4000,mmmu_val_mmmu_acc,0.25556, FineVision,4000,mmstar_average,0.2575590188757354, FineVision,4000,ocrbench_ocrbench_accuracy,0.453, FineVision,4000,seedbench_seed_all,0.33913285158421347, FineVision,4000,textvqa_val_exact_match,0.4593,0.006791695475025738 FineVision,5000,ai2d_exact_match,0.3125,0.008342439145556371 FineVision,5000,average,0.3974627910380972, FineVision,5000,average_rank,2.6, FineVision,5000,chartqa_relaxed_overall,0.5488,0.00995424828018316 FineVision,5000,docvqa_val_anls,0.552360266782429,0.006300308519952055 FineVision,5000,infovqa_val_anls,0.23425555286643698,0.007002254622066442 FineVision,5000,mme_total_score,1181.4653861544618, FineVision,5000,mmmu_val_mmmu_acc,0.26667, FineVision,5000,mmstar_average,0.29596648146165705, FineVision,5000,ocrbench_ocrbench_accuracy,0.462, FineVision,5000,seedbench_seed_all,0.43107281823235133, FineVision,5000,textvqa_val_exact_match,0.47354000000000007,0.0068172185364497985 FineVision,6000,ai2d_exact_match,0.358160621761658,0.008629463221867162 FineVision,6000,average,0.4161227404571003, FineVision,6000,average_rank,2.1, FineVision,6000,chartqa_relaxed_overall,0.5628,0.00992279440175477 FineVision,6000,docvqa_val_anls,0.5747451497228876,0.00625495440870239 FineVision,6000,infovqa_val_anls,0.22152017368968838,0.006604546680525351 FineVision,6000,mme_total_score,1284.1648659463785, FineVision,6000,mmmu_val_mmmu_acc,0.27111, FineVision,6000,mmstar_average,0.2978489412854164, FineVision,6000,ocrbench_ocrbench_accuracy,0.495, FineVision,6000,seedbench_seed_all,0.4795997776542524, FineVision,6000,textvqa_val_exact_match,0.48432,0.006800535050670284 FineVision,7000,ai2d_exact_match,0.3707901554404145,0.00869347755587734 FineVision,7000,average,0.4291083177345374, FineVision,7000,average_rank,2.4, FineVision,7000,chartqa_relaxed_overall,0.5656,0.009915542506251351 FineVision,7000,docvqa_val_anls,0.5940907049431567,0.006224236305767187 FineVision,7000,infovqa_val_anls,0.2515675215816963,0.007105097396092786 FineVision,7000,mme_total_score,1185.875650260104, FineVision,7000,mmmu_val_mmmu_acc,0.26556, FineVision,7000,mmstar_average,0.31372400960777047, FineVision,7000,ocrbench_ocrbench_accuracy,0.504, FineVision,7000,seedbench_seed_all,0.4964424680377988, FineVision,7000,textvqa_val_exact_match,0.5002,0.006794794025220267 FineVision,8000,ai2d_exact_match,0.37759067357512954,0.008725299846043883 FineVision,8000,average,0.43846759477995995, FineVision,8000,average_rank,2.2, FineVision,8000,chartqa_relaxed_overall,0.5832,0.009862556058385773 FineVision,8000,docvqa_val_anls,0.6017336419437208,0.006231612198089698 FineVision,8000,infovqa_val_anls,0.2449256624147254,0.006992518502948913 FineVision,8000,mme_total_score,1199.2409963985594, FineVision,8000,mmmu_val_mmmu_acc,0.28111, FineVision,8000,mmstar_average,0.33512257186205047, FineVision,8000,ocrbench_ocrbench_accuracy,0.51, FineVision,8000,seedbench_seed_all,0.5024458032240133, FineVision,8000,textvqa_val_exact_match,0.51008,0.006796301690135059 FineVision,9000,ai2d_exact_match,0.4067357512953368,0.008841214921078996 FineVision,9000,average,0.4422510732201056, FineVision,9000,average_rank,2.0, FineVision,9000,chartqa_relaxed_overall,0.5912,0.009834211136815875 FineVision,9000,docvqa_val_anls,0.6170968481662739,0.00617235763542544 FineVision,9000,infovqa_val_anls,0.23537031288570615,0.00670318154156447 FineVision,9000,mme_total_score,1231.5195078031213, FineVision,9000,mmmu_val_mmmu_acc,0.25889, FineVision,9000,mmstar_average,0.3216444898242951, FineVision,9000,ocrbench_ocrbench_accuracy,0.515, FineVision,9000,seedbench_seed_all,0.5120622568093385, FineVision,9000,textvqa_val_exact_match,0.52226,0.006792711289708482 FineVision,10000,ai2d_exact_match,0.39993523316062174,0.008817096257082848 FineVision,10000,average,0.4523875703250908, FineVision,10000,average_rank,1.7, FineVision,10000,chartqa_relaxed_overall,0.5996,0.00980154906867574 FineVision,10000,docvqa_val_anls,0.6262613496433054,0.006147756371688175 FineVision,10000,infovqa_val_anls,0.263290074230132,0.007186788766942786 FineVision,10000,mme_total_score,1240.8218287314926, FineVision,10000,mmmu_val_mmmu_acc,0.28778, FineVision,10000,mmstar_average,0.32972717906018517, FineVision,10000,ocrbench_ocrbench_accuracy,0.517, FineVision,10000,seedbench_seed_all,0.5217342968315731, FineVision,10000,textvqa_val_exact_match,0.5261600000000001,0.006785774843600811 FineVision,11000,ai2d_exact_match,0.422279792746114,0.008889771831066474 FineVision,11000,average,0.4561398159525099, FineVision,11000,average_rank,1.7, FineVision,11000,chartqa_relaxed_overall,0.6104,0.009755142291143075 FineVision,11000,docvqa_val_anls,0.6373130149166712,0.006128022584995044 FineVision,11000,infovqa_val_anls,0.24419378339723755,0.006897644885887063 FineVision,11000,mme_total_score,1322.9488795518205, FineVision,11000,mmmu_val_mmmu_acc,0.27778, FineVision,11000,mmstar_average,0.3298563439522548, FineVision,11000,ocrbench_ocrbench_accuracy,0.521, FineVision,11000,seedbench_seed_all,0.5237354085603113, FineVision,11000,textvqa_val_exact_match,0.5387,0.006770851562852138 FineVision,12000,ai2d_exact_match,0.42001295336787564,0.008883255931688034 FineVision,12000,average,0.4582751140055433, FineVision,12000,average_rank,1.6, FineVision,12000,chartqa_relaxed_overall,0.618,0.009719474639861454 FineVision,12000,docvqa_val_anls,0.6393961983751871,0.0061228747388476674 FineVision,12000,infovqa_val_anls,0.24798874058574302,0.006855374548993139 FineVision,12000,mme_total_score,1225.6453581432572, FineVision,12000,mmmu_val_mmmu_acc,0.27889, FineVision,12000,mmstar_average,0.34010867846816534, FineVision,12000,ocrbench_ocrbench_accuracy,0.512, FineVision,12000,seedbench_seed_all,0.5350194552529183, FineVision,12000,textvqa_val_exact_match,0.5330600000000001,0.006777713092109446 FineVision,13000,ai2d_exact_match,0.4375,0.008928571428571428 FineVision,13000,average,0.4692868662590049, FineVision,13000,average_rank,1.5, FineVision,13000,chartqa_relaxed_overall,0.6148,0.00973479791861169 FineVision,13000,docvqa_val_anls,0.6511374872549951,0.006086953065248391 FineVision,13000,infovqa_val_anls,0.24465055100441893,0.006808432538374664 FineVision,13000,mme_total_score,1281.7122849139657, FineVision,13000,mmmu_val_mmmu_acc,0.28222, FineVision,13000,mmstar_average,0.3453069542917521, FineVision,13000,ocrbench_ocrbench_accuracy,0.549, FineVision,13000,seedbench_seed_all,0.5442468037798777, FineVision,13000,textvqa_val_exact_match,0.55472,0.0067416788982325 FineVision,14000,ai2d_exact_match,0.4572538860103627,0.00896620675297095 FineVision,14000,average,0.47352486841689195, FineVision,14000,average_rank,1.4, FineVision,14000,chartqa_relaxed_overall,0.6172,0.009723347231923635 FineVision,14000,docvqa_val_anls,0.6502269393708169,0.006057950730638126 FineVision,14000,infovqa_val_anls,0.25805460837190913,0.007037735231659539 FineVision,14000,mme_total_score,1309.1444577831132, FineVision,14000,mmmu_val_mmmu_acc,0.28111, FineVision,14000,mmstar_average,0.34575818188776586, FineVision,14000,ocrbench_ocrbench_accuracy,0.551, FineVision,14000,seedbench_seed_all,0.5483602001111729, FineVision,14000,textvqa_val_exact_match,0.55276,0.006751206724612103 FineVision,15000,ai2d_exact_match,0.45045336787564766,0.008954861634252399 FineVision,15000,average,0.47878665012878824, FineVision,15000,average_rank,1.3, FineVision,15000,chartqa_relaxed_overall,0.612,0.009747841205275417 FineVision,15000,docvqa_val_anls,0.6621413031955148,0.006056838050222495 FineVision,15000,infovqa_val_anls,0.2706898598157733,0.007200315730154543 FineVision,15000,mme_total_score,1384.2171868747498, FineVision,15000,mmmu_val_mmmu_acc,0.30222, FineVision,15000,mmstar_average,0.35408135695920684, FineVision,15000,ocrbench_ocrbench_accuracy,0.558, FineVision,15000,seedbench_seed_all,0.5411339633129516, FineVision,15000,textvqa_val_exact_match,0.5583600000000001,0.0067279027203879065 FineVision,16000,ai2d_exact_match,0.45077720207253885,0.008955440137395838 FineVision,16000,average,0.47665128022935843, FineVision,16000,average_rank,1.5, FineVision,16000,chartqa_relaxed_overall,0.632,0.00964715642305132 FineVision,16000,docvqa_val_anls,0.6709415729142987,0.005999818105621502 FineVision,16000,infovqa_val_anls,0.26050032542402035,0.006997451875879188 FineVision,16000,mme_total_score,1317.8491396558625, FineVision,16000,mmmu_val_mmmu_acc,0.27556, FineVision,16000,mmstar_average,0.33214333327093315, FineVision,16000,ocrbench_ocrbench_accuracy,0.56, FineVision,16000,seedbench_seed_all,0.5463590883824346, FineVision,16000,textvqa_val_exact_match,0.56158,0.006723854754867398 FineVision,17000,ai2d_exact_match,0.45919689119170987,0.008969138793675545 FineVision,17000,average,0.4777141780162423, FineVision,17000,average_rank,1.3, FineVision,17000,chartqa_relaxed_overall,0.632,0.00964715642305132 FineVision,17000,docvqa_val_anls,0.6796338519136422,0.005948761388267941 FineVision,17000,infovqa_val_anls,0.28070956072505215,0.007298333094144192 FineVision,17000,mme_total_score,1381.9161664665867, FineVision,17000,mmmu_val_mmmu_acc,0.27667, FineVision,17000,mmstar_average,0.3370289492329521, FineVision,17000,ocrbench_ocrbench_accuracy,0.519, FineVision,17000,seedbench_seed_all,0.5510283490828238, FineVision,17000,textvqa_val_exact_match,0.56416,0.006724830373229479 FineVision,18000,ai2d_exact_match,0.46567357512953367,0.008977921602780726 FineVision,18000,average,0.4819834595278701, FineVision,18000,average_rank,1.2, FineVision,18000,chartqa_relaxed_overall,0.6376,0.009615793331418735 FineVision,18000,docvqa_val_anls,0.6775884603912571,0.005972234236435759 FineVision,18000,infovqa_val_anls,0.27154318420389256,0.007164903131667027 FineVision,18000,mme_total_score,1336.922769107643, FineVision,18000,mmmu_val_mmmu_acc,0.28667, FineVision,18000,mmstar_average,0.34482796716566916, FineVision,18000,ocrbench_ocrbench_accuracy,0.533, FineVision,18000,seedbench_seed_all,0.5543079488604781, FineVision,18000,textvqa_val_exact_match,0.5666399999999999,0.006713392287599574 FineVision,19000,ai2d_exact_match,0.4682642487046632,0.008981008686994101 FineVision,19000,average,0.4899006713916878, FineVision,19000,average_rank,1.2, FineVision,19000,chartqa_relaxed_overall,0.6444,0.009575809858898698 FineVision,19000,docvqa_val_anls,0.678226526479947,0.005970619221588814 FineVision,19000,infovqa_val_anls,0.26993847247278,0.0071348470764911525 FineVision,19000,mme_total_score,1406.6628651460583, FineVision,19000,mmmu_val_mmmu_acc,0.28333, FineVision,19000,mmstar_average,0.356220913822775, FineVision,19000,ocrbench_ocrbench_accuracy,0.577, FineVision,19000,seedbench_seed_all,0.554585881045025, FineVision,19000,textvqa_val_exact_match,0.57714,0.0066918487914812905 FineVision,20000,ai2d_exact_match,0.47571243523316065,0.00898853090258662 FineVision,20000,average,0.4873169067639118, FineVision,20000,average_rank,1.2, FineVision,20000,chartqa_relaxed_overall,0.6336,0.009638338810708618 FineVision,20000,docvqa_val_anls,0.6895214454380043,0.005896462073053767 FineVision,20000,infovqa_val_anls,0.2655657550458317,0.007033265532032538 FineVision,20000,mme_total_score,1324.6738695478193, FineVision,20000,mmmu_val_mmmu_acc,0.30111, FineVision,20000,mmstar_average,0.33806766134497995, FineVision,20000,ocrbench_ocrbench_accuracy,0.555, FineVision,20000,seedbench_seed_all,0.5587548638132296, FineVision,20000,textvqa_val_exact_match,0.56852,0.006720151338087659 Cauldron,1000,ai2d_exact_match,0.28886010362694303,0.008157423105367313 Cauldron,1000,average,0.29904301214549334, Cauldron,1000,average_rank,1.9, Cauldron,1000,chartqa_relaxed_overall,0.1936,0.007903961351247664 Cauldron,1000,docvqa_val_anls,0.32153744261519257,0.005317068996930092 Cauldron,1000,infovqa_val_anls,0.1431990055083018,0.005424936025458022 Cauldron,1000,mme_total_score,1172.0779311724689, Cauldron,1000,mmmu_val_mmmu_acc,0.27667, Cauldron,1000,mmstar_average,0.2911329978035828, Cauldron,1000,ocrbench_ocrbench_accuracy,0.337, Cauldron,1000,seedbench_seed_all,0.39360755975541967, Cauldron,1000,textvqa_val_exact_match,0.44578,0.0067711747933144 Cauldron,2000,ai2d_exact_match,0.41871761658031087,0.008879446246519871 Cauldron,2000,average,0.34894207663644056, Cauldron,2000,average_rank,1.9, Cauldron,2000,chartqa_relaxed_overall,0.2056,0.00808440468059435 Cauldron,2000,docvqa_val_anls,0.37496112947656884,0.005489559822643159 Cauldron,2000,infovqa_val_anls,0.14667060624395192,0.005473110880489631 Cauldron,2000,mme_total_score,1248.6002400960383, Cauldron,2000,mmmu_val_mmmu_acc,0.28667, Cauldron,2000,mmstar_average,0.34478967650439835, Cauldron,2000,ocrbench_ocrbench_accuracy,0.368, Cauldron,2000,seedbench_seed_all,0.5013896609227348, Cauldron,2000,textvqa_val_exact_match,0.49368,0.0068081481840761415 Cauldron,3000,ai2d_exact_match,0.4653497409326425,0.00897751861457722 Cauldron,3000,average,0.3647655686453986, Cauldron,3000,average_rank,2.4, Cauldron,3000,chartqa_relaxed_overall,0.2192,0.008275744025504309 Cauldron,3000,docvqa_val_anls,0.3999560247980121,0.005545460541574292 Cauldron,3000,infovqa_val_anls,0.15452276899525894,0.005625373377223539 Cauldron,3000,mme_total_score,1164.4316726690677, Cauldron,3000,mmmu_val_mmmu_acc,0.27667, Cauldron,3000,mmstar_average,0.34444117730168444, Cauldron,3000,ocrbench_ocrbench_accuracy,0.403, Cauldron,3000,seedbench_seed_all,0.5147304057809894, Cauldron,3000,textvqa_val_exact_match,0.50502,0.006802809387533405 Cauldron,4000,ai2d_exact_match,0.48121761658031087,0.008992802471886854 Cauldron,4000,average,0.3694904966669109, Cauldron,4000,average_rank,2.3, Cauldron,4000,chartqa_relaxed_overall,0.2184,0.008264859294607735 Cauldron,4000,docvqa_val_anls,0.40927640030259055,0.005557758057811595 Cauldron,4000,infovqa_val_anls,0.15259984907145144,0.005629341537638722 Cauldron,4000,mme_total_score,1238.5236094437776, Cauldron,4000,mmmu_val_mmmu_acc,0.26667, Cauldron,4000,mmstar_average,0.36056167686607765, Cauldron,4000,ocrbench_ocrbench_accuracy,0.414, Cauldron,4000,seedbench_seed_all,0.5240689271817677, Cauldron,4000,textvqa_val_exact_match,0.49862,0.006804563140709856 Cauldron,5000,ai2d_exact_match,0.48607512953367876,0.008995663534025174 Cauldron,5000,average,0.3715613183242104, Cauldron,5000,average_rank,2.3, Cauldron,5000,chartqa_relaxed_overall,0.2236,0.008334806752495259 Cauldron,5000,docvqa_val_anls,0.42332206291362884,0.005573327842684563 Cauldron,5000,infovqa_val_anls,0.15868297927477548,0.005670852175948406 Cauldron,5000,mme_total_score,1159.8522408963586, Cauldron,5000,mmmu_val_mmmu_acc,0.26889, Cauldron,5000,mmstar_average,0.360337335219157, Cauldron,5000,ocrbench_ocrbench_accuracy,0.401, Cauldron,5000,seedbench_seed_all,0.5198443579766537, Cauldron,5000,textvqa_val_exact_match,0.5023,0.0068036313744923 Cauldron,6000,ai2d_exact_match,0.5025906735751295,0.008999033321198393 Cauldron,6000,average,0.3678206000506273, Cauldron,6000,average_rank,2.2, Cauldron,6000,chartqa_relaxed_overall,0.2228,0.008324168469720259 Cauldron,6000,docvqa_val_anls,0.4147154618557465,0.005557478918091434 Cauldron,6000,infovqa_val_anls,0.14825798330117057,0.005517775162348899 Cauldron,6000,mme_total_score,1182.059923969588, Cauldron,6000,mmmu_val_mmmu_acc,0.27111, Cauldron,6000,mmstar_average,0.3484854117958612, Cauldron,6000,ocrbench_ocrbench_accuracy,0.391, Cauldron,6000,seedbench_seed_all,0.5185658699277377, Cauldron,6000,textvqa_val_exact_match,0.49285999999999996,0.0068052528515312825 Cauldron,7000,ai2d_exact_match,0.49838082901554404,0.008999106932714641 Cauldron,7000,average,0.3749288136256422, Cauldron,7000,average_rank,2.0, Cauldron,7000,chartqa_relaxed_overall,0.2276,0.00838733777631434 Cauldron,7000,docvqa_val_anls,0.42525461500166023,0.005595478547875609 Cauldron,7000,infovqa_val_anls,0.14305767989732765,0.005444282186253047 Cauldron,7000,mme_total_score,1262.065426170468, Cauldron,7000,mmmu_val_mmmu_acc,0.29333, Cauldron,7000,mmstar_average,0.35012603751558075, Cauldron,7000,ocrbench_ocrbench_accuracy,0.403, Cauldron,7000,seedbench_seed_all,0.5222901612006671, Cauldron,7000,textvqa_val_exact_match,0.51132,0.00682164778449453 Cauldron,8000,ai2d_exact_match,0.49028497409326427,0.008997455247470544 Cauldron,8000,average,0.3674367285685282, Cauldron,8000,average_rank,2.8, Cauldron,8000,chartqa_relaxed_overall,0.2256,0.008361209238380008 Cauldron,8000,docvqa_val_anls,0.40937518311359955,0.005568234588180622 Cauldron,8000,infovqa_val_anls,0.14953110986986237,0.005518589617885333 Cauldron,8000,mme_total_score,1210.7711084433772, Cauldron,8000,mmmu_val_mmmu_acc,0.28889, Cauldron,8000,mmstar_average,0.32742675529850473, Cauldron,8000,ocrbench_ocrbench_accuracy,0.406, Cauldron,8000,seedbench_seed_all,0.512562534741523, Cauldron,8000,textvqa_val_exact_match,0.49726000000000004,0.006823680165585169 Cauldron,9000,ai2d_exact_match,0.49287564766839376,0.008998240543632314 Cauldron,9000,average,0.3635862393983371, Cauldron,9000,average_rank,3.0, Cauldron,9000,chartqa_relaxed_overall,0.2264,0.008371693383064148 Cauldron,9000,docvqa_val_anls,0.4019142603693516,0.005557969721056488 Cauldron,9000,infovqa_val_anls,0.15576345355793061,0.005631711679425604 Cauldron,9000,mme_total_score,1161.06112444978, Cauldron,9000,mmmu_val_mmmu_acc,0.27, Cauldron,9000,mmstar_average,0.33510800699714055, Cauldron,9000,ocrbench_ocrbench_accuracy,0.401, Cauldron,9000,seedbench_seed_all,0.5066147859922179, Cauldron,9000,textvqa_val_exact_match,0.4825999999999999,0.006824717089570126 Cauldron,10000,ai2d_exact_match,0.4951424870466321,0.008998729431386465 Cauldron,10000,average,0.3613896970671388, Cauldron,10000,average_rank,3.2, Cauldron,10000,chartqa_relaxed_overall,0.2276,0.00838733777631434 Cauldron,10000,docvqa_val_anls,0.400968382089468,0.005551850287661274 Cauldron,10000,infovqa_val_anls,0.15155496077062244,0.0055346119867504375 Cauldron,10000,mme_total_score,1230.2276910764306, Cauldron,10000,mmmu_val_mmmu_acc,0.26, Cauldron,10000,mmstar_average,0.32908517910608676, Cauldron,10000,ocrbench_ocrbench_accuracy,0.395, Cauldron,10000,seedbench_seed_all,0.4972762645914397, Cauldron,10000,textvqa_val_exact_match,0.49588000000000004,0.006836984276038533 Cauldron,11000,ai2d_exact_match,0.49676165803108807,0.008998965371572357 Cauldron,11000,average,0.36198497174992383, Cauldron,11000,average_rank,3.0, Cauldron,11000,chartqa_relaxed_overall,0.2284,0.008397713059747491 Cauldron,11000,docvqa_val_anls,0.4051111426655002,0.0055740680205303966 Cauldron,11000,infovqa_val_anls,0.14954437197310022,0.005537262124650125 Cauldron,11000,mme_total_score,1210.5605242096838, Cauldron,11000,mmmu_val_mmmu_acc,0.27111, Cauldron,11000,mmstar_average,0.33316183100069335, Cauldron,11000,ocrbench_ocrbench_accuracy,0.383, Cauldron,11000,seedbench_seed_all,0.5043357420789327, Cauldron,11000,textvqa_val_exact_match,0.48644,0.006834542228525236 Cauldron,12000,ai2d_exact_match,0.5009715025906736,0.008999137132137068 Cauldron,12000,average,0.3661893496614986, Cauldron,12000,average_rank,3.2, Cauldron,12000,chartqa_relaxed_overall,0.2332,0.008459061785476934 Cauldron,12000,docvqa_val_anls,0.40826612382074784,0.0055749766883040515 Cauldron,12000,infovqa_val_anls,0.1451043668322714,0.0054346014264420334 Cauldron,12000,mme_total_score,1204.859843937575, Cauldron,12000,mmmu_val_mmmu_acc,0.29222, Cauldron,12000,mmstar_average,0.3322773065724958, Cauldron,12000,ocrbench_ocrbench_accuracy,0.386, Cauldron,12000,seedbench_seed_all,0.5047248471372985, Cauldron,12000,textvqa_val_exact_match,0.49294000000000004,0.006824466715369768 Cauldron,13000,ai2d_exact_match,0.4880181347150259,0.00899656981935399 Cauldron,13000,average,0.3609903418270159, Cauldron,13000,average_rank,3.2, Cauldron,13000,chartqa_relaxed_overall,0.23,0.008418334000200726 Cauldron,13000,docvqa_val_anls,0.39428463826041577,0.005550710740937849 Cauldron,13000,infovqa_val_anls,0.15077272156398794,0.005555043265840396 Cauldron,13000,mme_total_score,1199.0380152060825, Cauldron,13000,mmmu_val_mmmu_acc,0.27667, Cauldron,13000,mmstar_average,0.3323119954668039, Cauldron,13000,ocrbench_ocrbench_accuracy,0.39, Cauldron,13000,seedbench_seed_all,0.5000555864369094, Cauldron,13000,textvqa_val_exact_match,0.4868,0.006822203492428118 Cauldron,14000,ai2d_exact_match,0.49060880829015546,0.00899756662777987 Cauldron,14000,average,0.36202481121184005, Cauldron,14000,average_rank,2.9, Cauldron,14000,chartqa_relaxed_overall,0.2264,0.008371693383064148 Cauldron,14000,docvqa_val_anls,0.40917044569115923,0.0055666808292464285 Cauldron,14000,infovqa_val_anls,0.1424839907142797,0.0054301311838352165 Cauldron,14000,mme_total_score,1183.6356542617045, Cauldron,14000,mmmu_val_mmmu_acc,0.29, Cauldron,14000,mmstar_average,0.31528335804531843, Cauldron,14000,ocrbench_ocrbench_accuracy,0.393, Cauldron,14000,seedbench_seed_all,0.5020566981656476, Cauldron,14000,textvqa_val_exact_match,0.48922,0.006837726904596613 Cauldron,15000,ai2d_exact_match,0.4896373056994819,0.008997221155546275 Cauldron,15000,average,0.3560155869130515, Cauldron,15000,average_rank,3.2, Cauldron,15000,chartqa_relaxed_overall,0.2264,0.008371693383064148 Cauldron,15000,docvqa_val_anls,0.39997251595677663,0.0055655493795707745 Cauldron,15000,infovqa_val_anls,0.13834600428667498,0.005423970029609658 Cauldron,15000,mme_total_score,1171.8512404961984, Cauldron,15000,mmmu_val_mmmu_acc,0.27667, Cauldron,15000,mmstar_average,0.31369390041016126, Cauldron,15000,ocrbench_ocrbench_accuracy,0.385, Cauldron,15000,seedbench_seed_all,0.5010005558643691, Cauldron,15000,textvqa_val_exact_match,0.47342,0.006818885551175648 Cauldron,16000,ai2d_exact_match,0.4838082901554404,0.008994434238637765 Cauldron,16000,average,0.3566345947908368, Cauldron,16000,average_rank,3.4, Cauldron,16000,chartqa_relaxed_overall,0.22,0.008286583553358689 Cauldron,16000,docvqa_val_anls,0.40446794741098796,0.005565712054024941 Cauldron,16000,infovqa_val_anls,0.1414810779340465,0.005414255001486301 Cauldron,16000,mme_total_score,1163.921468587435, Cauldron,16000,mmmu_val_mmmu_acc,0.26444, Cauldron,16000,mmstar_average,0.3211159497904861, Cauldron,16000,ocrbench_ocrbench_accuracy,0.392, Cauldron,16000,seedbench_seed_all,0.5045580878265703, Cauldron,16000,textvqa_val_exact_match,0.47784,0.0068411071493878735 Cauldron,17000,ai2d_exact_match,0.4795984455958549,0.008991659681159872 Cauldron,17000,average,0.35664663136828295, Cauldron,17000,average_rank,3.3, Cauldron,17000,chartqa_relaxed_overall,0.2232,0.008329493152795851 Cauldron,17000,docvqa_val_anls,0.39683521379075226,0.0055483771434975925 Cauldron,17000,infovqa_val_anls,0.14519383287788715,0.005493162839439223 Cauldron,17000,mme_total_score,1216.2439975990396, Cauldron,17000,mmmu_val_mmmu_acc,0.27667, Cauldron,17000,mmstar_average,0.3294722845469949, Cauldron,17000,ocrbench_ocrbench_accuracy,0.386, Cauldron,17000,seedbench_seed_all,0.4938299055030573, Cauldron,17000,textvqa_val_exact_match,0.47902,0.006822615153700749 Cauldron,18000,ai2d_exact_match,0.48575129533678757,0.008995499260034972 Cauldron,18000,average,0.3559572601168983, Cauldron,18000,average_rank,3.3, Cauldron,18000,chartqa_relaxed_overall,0.22,0.008286583553358689 Cauldron,18000,docvqa_val_anls,0.39553075414155453,0.005560094600545488 Cauldron,18000,infovqa_val_anls,0.1441200977793978,0.005482620397489444 Cauldron,18000,mme_total_score,1146.935774309724, Cauldron,18000,mmmu_val_mmmu_acc,0.28333, Cauldron,18000,mmstar_average,0.31718334943636844, Cauldron,18000,ocrbench_ocrbench_accuracy,0.393, Cauldron,18000,seedbench_seed_all,0.49571984435797667, Cauldron,18000,textvqa_val_exact_match,0.46897999999999995,0.006834829544251984 Cauldron,19000,ai2d_exact_match,0.47506476683937826,0.00898795641911507 Cauldron,19000,average,0.35389113555756785, Cauldron,19000,average_rank,3.4, Cauldron,19000,chartqa_relaxed_overall,0.2196,0.008281169428700436 Cauldron,19000,docvqa_val_anls,0.3927677091095705,0.005557918115613283 Cauldron,19000,infovqa_val_anls,0.14242963523056748,0.005420426599891758 Cauldron,19000,mme_total_score,1156.7713085234095, Cauldron,19000,mmmu_val_mmmu_acc,0.26667, Cauldron,19000,mmstar_average,0.3300183589775604, Cauldron,19000,ocrbench_ocrbench_accuracy,0.393, Cauldron,19000,seedbench_seed_all,0.4895497498610339, Cauldron,19000,textvqa_val_exact_match,0.47591999999999995,0.0068329619195279245 Cauldron,20000,ai2d_exact_match,0.48218911917098445,0.008993442748995703 Cauldron,20000,average,0.35315414152261965, Cauldron,20000,average_rank,3.1, Cauldron,20000,chartqa_relaxed_overall,0.2228,0.008324168469720259 Cauldron,20000,docvqa_val_anls,0.3995019956467228,0.005554102577571356 Cauldron,20000,infovqa_val_anls,0.13561089161386572,0.005312619238987202 Cauldron,20000,mme_total_score,1205.715886354542, Cauldron,20000,mmmu_val_mmmu_acc,0.27667, Cauldron,20000,mmstar_average,0.3019064734976851, Cauldron,20000,ocrbench_ocrbench_accuracy,0.392, Cauldron,20000,seedbench_seed_all,0.49182879377431904, Cauldron,20000,textvqa_val_exact_match,0.4758799999999999,0.0068345144112400185 Cambrian,1000,ai2d_exact_match,0.2969559585492228,0.00822373246069825 Cambrian,1000,average,0.2927820669039429, Cambrian,1000,average_rank,2.3, Cambrian,1000,chartqa_relaxed_overall,0.3652,0.009631650506356148 Cambrian,1000,docvqa_val_anls,0.3321611875422322,0.005779917542014128 Cambrian,1000,infovqa_val_anls,0.14245417507906105,0.005737797137238206 Cambrian,1000,mme_total_score,1199.468087234894, Cambrian,1000,mmmu_val_mmmu_acc,0.24556, Cambrian,1000,mmstar_average,0.25503356223234036, Cambrian,1000,ocrbench_ocrbench_accuracy,0.257, Cambrian,1000,seedbench_seed_all,0.3486937187326292, Cambrian,1000,textvqa_val_exact_match,0.39198,0.0066503820519040295 Cambrian,2000,ai2d_exact_match,0.36204663212435234,0.008649846657326264 Cambrian,2000,average,0.34977426052091565, Cambrian,2000,average_rank,2.3, Cambrian,2000,chartqa_relaxed_overall,0.4272,0.009895414680177737 Cambrian,2000,docvqa_val_anls,0.4044005302893221,0.006099745172446295 Cambrian,2000,infovqa_val_anls,0.16067123444748188,0.005906486800204124 Cambrian,2000,mme_total_score,1191.6502601040415, Cambrian,2000,mmmu_val_mmmu_acc,0.27, Cambrian,2000,mmstar_average,0.3140124492167455, Cambrian,2000,ocrbench_ocrbench_accuracy,0.293, Cambrian,2000,seedbench_seed_all,0.4954974986103391, Cambrian,2000,textvqa_val_exact_match,0.42113999999999996,0.006720777771268006 Cambrian,3000,ai2d_exact_match,0.3954015544041451,0.008800034697838395 Cambrian,3000,average,0.36894910100121225, Cambrian,3000,average_rank,1.9, Cambrian,3000,chartqa_relaxed_overall,0.4512,0.00995424828018316 Cambrian,3000,docvqa_val_anls,0.4317442116227413,0.006203480507897517 Cambrian,3000,infovqa_val_anls,0.17555075927653038,0.006227695613801885 Cambrian,3000,mme_total_score,1311.187975190076, Cambrian,3000,mmmu_val_mmmu_acc,0.28222, Cambrian,3000,mmstar_average,0.3241666733128301, Cambrian,3000,ocrbench_ocrbench_accuracy,0.289, Cambrian,3000,seedbench_seed_all,0.5216787103946637, Cambrian,3000,textvqa_val_exact_match,0.4495799999999999,0.006762330259763156 Cambrian,4000,ai2d_exact_match,0.3960492227979275,0.00880252039912977 Cambrian,4000,average,0.38270567946732525, Cambrian,4000,average_rank,2.2, Cambrian,4000,chartqa_relaxed_overall,0.4764,0.009990852959439592 Cambrian,4000,docvqa_val_anls,0.46350742276594625,0.006276498296530657 Cambrian,4000,infovqa_val_anls,0.17819320935276328,0.006230849386066924 Cambrian,4000,mme_total_score,1239.0667266906762, Cambrian,4000,mmmu_val_mmmu_acc,0.26778, Cambrian,4000,mmstar_average,0.3298927333298682, Cambrian,4000,ocrbench_ocrbench_accuracy,0.334, Cambrian,4000,seedbench_seed_all,0.5273485269594219, Cambrian,4000,textvqa_val_exact_match,0.47118000000000004,0.0067854764061200295 Cambrian,5000,ai2d_exact_match,0.40382124352331605,0.00883109414387431 Cambrian,5000,average,0.3896927239658996, Cambrian,5000,average_rank,2.2, Cambrian,5000,chartqa_relaxed_overall,0.4912,0.01000045137036546 Cambrian,5000,docvqa_val_anls,0.47067674424138894,0.006257580396259991 Cambrian,5000,infovqa_val_anls,0.19432385292037085,0.00653326869729313 Cambrian,5000,mme_total_score,1214.843337334934, Cambrian,5000,mmmu_val_mmmu_acc,0.26556, Cambrian,5000,mmstar_average,0.3255942091936794, Cambrian,5000,ocrbench_ocrbench_accuracy,0.348, Cambrian,5000,seedbench_seed_all,0.5292384658143413, Cambrian,5000,textvqa_val_exact_match,0.47881999999999997,0.0067962283116337965 Cambrian,6000,ai2d_exact_match,0.4183937823834197,0.00887848400426025 Cambrian,6000,average,0.39990121640985093, Cambrian,6000,average_rank,2.4, Cambrian,6000,chartqa_relaxed_overall,0.5048,0.010001539697392967 Cambrian,6000,docvqa_val_anls,0.5016482570925722,0.006248476976439708 Cambrian,6000,infovqa_val_anls,0.19206925076752404,0.006399951499514914 Cambrian,6000,mme_total_score,1176.5368147258905, Cambrian,6000,mmmu_val_mmmu_acc,0.26667, Cambrian,6000,mmstar_average,0.33910121942401966, Cambrian,6000,ocrbench_ocrbench_accuracy,0.349, Cambrian,6000,seedbench_seed_all,0.5391884380211228, Cambrian,6000,textvqa_val_exact_match,0.48823999999999995,0.006792935247288521 Cambrian,7000,ai2d_exact_match,0.4326424870466321,0.008917121282993509 Cambrian,7000,average,0.40874111160527243, Cambrian,7000,average_rank,2.2, Cambrian,7000,chartqa_relaxed_overall,0.5088,0.01000045137036546 Cambrian,7000,docvqa_val_anls,0.5036441729071615,0.006331057466984081 Cambrian,7000,infovqa_val_anls,0.21047690542452482,0.0067248622097179815 Cambrian,7000,mme_total_score,1226.7814125650261, Cambrian,7000,mmmu_val_mmmu_acc,0.29, Cambrian,7000,mmstar_average,0.338458434622219, Cambrian,7000,ocrbench_ocrbench_accuracy,0.366, Cambrian,7000,seedbench_seed_all,0.5344080044469149, Cambrian,7000,textvqa_val_exact_match,0.49423999999999996,0.006789004536492761 Cambrian,8000,ai2d_exact_match,0.4375,0.008928571428571428 Cambrian,8000,average,0.4145399236017655, Cambrian,8000,average_rank,2.2, Cambrian,8000,chartqa_relaxed_overall,0.5312,0.009982508912777261 Cambrian,8000,docvqa_val_anls,0.5139425879433994,0.006316907313170543 Cambrian,8000,infovqa_val_anls,0.20402472511542052,0.00665285157736885 Cambrian,8000,mme_total_score,1243.7800120048018, Cambrian,8000,mmmu_val_mmmu_acc,0.28222, Cambrian,8000,mmstar_average,0.3300028831814166, Cambrian,8000,ocrbench_ocrbench_accuracy,0.397, Cambrian,8000,seedbench_seed_all,0.5364091161756531, Cambrian,8000,textvqa_val_exact_match,0.49855999999999995,0.006793174127235705 Cambrian,9000,ai2d_exact_match,0.4251943005181347,0.008897867521411106 Cambrian,9000,average,0.41587431550154147, Cambrian,9000,average_rank,2.0, Cambrian,9000,chartqa_relaxed_overall,0.5316,0.009982005418395102 Cambrian,9000,docvqa_val_anls,0.524278096798472,0.006327817979288962 Cambrian,9000,infovqa_val_anls,0.2075069347958689,0.006574086714467312 Cambrian,9000,mme_total_score,1196.0997398959585, Cambrian,9000,mmmu_val_mmmu_acc,0.28556, Cambrian,9000,mmstar_average,0.33833745626187595, Cambrian,9000,ocrbench_ocrbench_accuracy,0.381, Cambrian,9000,seedbench_seed_all,0.5456920511395219, Cambrian,9000,textvqa_val_exact_match,0.5036999999999999,0.006790970877355565 Cambrian,10000,ai2d_exact_match,0.44559585492227977,0.008945723914357835 Cambrian,10000,average,0.41659534392300923, Cambrian,10000,average_rank,2.0, Cambrian,10000,chartqa_relaxed_overall,0.5416,0.00996732235888869 Cambrian,10000,docvqa_val_anls,0.5215772912722147,0.006314944464077694 Cambrian,10000,infovqa_val_anls,0.18925972424188112,0.006302599390246784 Cambrian,10000,mme_total_score,1241.6579631852742, Cambrian,10000,mmmu_val_mmmu_acc,0.27889, Cambrian,10000,mmstar_average,0.34495128935097424, Cambrian,10000,ocrbench_ocrbench_accuracy,0.373, Cambrian,10000,seedbench_seed_all,0.5510839355197332, Cambrian,10000,textvqa_val_exact_match,0.5034000000000001,0.0067932111363852585 Cambrian,11000,ai2d_exact_match,0.4481865284974093,0.008950704796242765 Cambrian,11000,average,0.42096531591252645, Cambrian,11000,average_rank,2.0, Cambrian,11000,chartqa_relaxed_overall,0.5388,0.0099718403035556 Cambrian,11000,docvqa_val_anls,0.5266496382012209,0.006315639724937912 Cambrian,11000,infovqa_val_anls,0.210453542763111,0.006757501751011823 Cambrian,11000,mme_total_score,1288.1182472989194, Cambrian,11000,mmmu_val_mmmu_acc,0.28556, Cambrian,11000,mmstar_average,0.33813173019346515, Cambrian,11000,ocrbench_ocrbench_accuracy,0.372, Cambrian,11000,seedbench_seed_all,0.547526403557532, Cambrian,11000,textvqa_val_exact_match,0.5213800000000001,0.00677771101429669 Cambrian,12000,ai2d_exact_match,0.4566062176165803,0.008965198879336198 Cambrian,12000,average,0.42647137409223257, Cambrian,12000,average_rank,2.1, Cambrian,12000,chartqa_relaxed_overall,0.5488,0.00995424828018316 Cambrian,12000,docvqa_val_anls,0.5432685128640529,0.006286968775744768 Cambrian,12000,infovqa_val_anls,0.214068867667478,0.006728697021311144 Cambrian,12000,mme_total_score,1272.0885354141656, Cambrian,12000,mmmu_val_mmmu_acc,0.27556, Cambrian,12000,mmstar_average,0.3364706975313428, Cambrian,12000,ocrbench_ocrbench_accuracy,0.396, Cambrian,12000,seedbench_seed_all,0.5505280711506393, Cambrian,12000,textvqa_val_exact_match,0.51694,0.00676817323313926 Cambrian,13000,ai2d_exact_match,0.44591968911917096,0.008946359966425538 Cambrian,13000,average,0.42595033048849396, Cambrian,13000,average_rank,2.1, Cambrian,13000,chartqa_relaxed_overall,0.5484,0.009955029736109216 Cambrian,13000,docvqa_val_anls,0.5438384263330651,0.006322105329987294 Cambrian,13000,infovqa_val_anls,0.2206834922799479,0.006931006985711701 Cambrian,13000,mme_total_score,1294.3567426970787, Cambrian,13000,mmmu_val_mmmu_acc,0.27889, Cambrian,13000,mmstar_average,0.3258043460972802, Cambrian,13000,ocrbench_ocrbench_accuracy,0.404, Cambrian,13000,seedbench_seed_all,0.5466370205669816, Cambrian,13000,textvqa_val_exact_match,0.5193800000000001,0.006779976160381913 Cambrian,14000,ai2d_exact_match,0.452720207253886,0.00895883074213608 Cambrian,14000,average,0.4290628718702856, Cambrian,14000,average_rank,2.2, Cambrian,14000,chartqa_relaxed_overall,0.5624,0.009923804147377265 Cambrian,14000,docvqa_val_anls,0.5501582985035621,0.006289139790552158 Cambrian,14000,infovqa_val_anls,0.2108586833777777,0.006694603397438603 Cambrian,14000,mme_total_score,1258.3851540616247, Cambrian,14000,mmmu_val_mmmu_acc,0.28444, Cambrian,14000,mmstar_average,0.3392338272359765, Cambrian,14000,ocrbench_ocrbench_accuracy,0.391, Cambrian,14000,seedbench_seed_all,0.5506948304613675, Cambrian,14000,textvqa_val_exact_match,0.5200600000000001,0.006762031077483937 Cambrian,15000,ai2d_exact_match,0.4575777202072539,0.008966704964444827 Cambrian,15000,average,0.4277300448618869, Cambrian,15000,average_rank,2.2, Cambrian,15000,chartqa_relaxed_overall,0.5572,0.009936335154498413 Cambrian,15000,docvqa_val_anls,0.550106577844955,0.006305789516584643 Cambrian,15000,infovqa_val_anls,0.2065365477570411,0.006585265308234506 Cambrian,15000,mme_total_score,1191.499399759904, Cambrian,15000,mmmu_val_mmmu_acc,0.27667, Cambrian,15000,mmstar_average,0.3287834934674655, Cambrian,15000,ocrbench_ocrbench_accuracy,0.403, Cambrian,15000,seedbench_seed_all,0.5489160644802669, Cambrian,15000,textvqa_val_exact_match,0.52078,0.006761241098810132 Cambrian,16000,ai2d_exact_match,0.45174870466321243,0.008957152666985158 Cambrian,16000,average,0.4283932783055524, Cambrian,16000,average_rank,2.0, Cambrian,16000,chartqa_relaxed_overall,0.566,0.00991448025705367 Cambrian,16000,docvqa_val_anls,0.5507111549470696,0.006298722691255348 Cambrian,16000,infovqa_val_anls,0.21185403234992514,0.0065982885956266755 Cambrian,16000,mme_total_score,1242.7407963185274, Cambrian,16000,mmmu_val_mmmu_acc,0.28111, Cambrian,16000,mmstar_average,0.32560559611383355, Cambrian,16000,ocrbench_ocrbench_accuracy,0.394, Cambrian,16000,seedbench_seed_all,0.5540300166759311, Cambrian,16000,textvqa_val_exact_match,0.5204799999999999,0.006783488561456611 Cambrian,17000,ai2d_exact_match,0.4585492227979275,0.008968176705111413 Cambrian,17000,average,0.43044446070382536, Cambrian,17000,average_rank,2.4, Cambrian,17000,chartqa_relaxed_overall,0.5656,0.009915542506251351 Cambrian,17000,docvqa_val_anls,0.5528747665552118,0.006300095973166064 Cambrian,17000,infovqa_val_anls,0.20960594545383252,0.0066643358201217045 Cambrian,17000,mme_total_score,1292.4750900360143, Cambrian,17000,mmmu_val_mmmu_acc,0.27111, Cambrian,17000,mmstar_average,0.3297184661133375, Cambrian,17000,ocrbench_ocrbench_accuracy,0.409, Cambrian,17000,seedbench_seed_all,0.555141745414119, Cambrian,17000,textvqa_val_exact_match,0.5224,0.006774129151791618 Cambrian,18000,ai2d_exact_match,0.4523963730569948,0.008958275210820045 Cambrian,18000,average,0.43086034100304976, Cambrian,18000,average_rank,2.4, Cambrian,18000,chartqa_relaxed_overall,0.566,0.00991448025705367 Cambrian,18000,docvqa_val_anls,0.5527950768923724,0.006311862091164367 Cambrian,18000,infovqa_val_anls,0.21943552260393814,0.006848865968629337 Cambrian,18000,mme_total_score,1271.4629851940776, Cambrian,18000,mmmu_val_mmmu_acc,0.28333, Cambrian,18000,mmstar_average,0.3399009269355101, Cambrian,18000,ocrbench_ocrbench_accuracy,0.403, Cambrian,18000,seedbench_seed_all,0.5493051695386326, Cambrian,18000,textvqa_val_exact_match,0.5115799999999999,0.0067870754820260944 Cambrian,19000,ai2d_exact_match,0.45012953367875647,0.008954279299902583 Cambrian,19000,average,0.43057935657557483, Cambrian,19000,average_rank,2.2, Cambrian,19000,chartqa_relaxed_overall,0.5704,0.009902361269085337 Cambrian,19000,docvqa_val_anls,0.5526262050544066,0.006310038331338026 Cambrian,19000,infovqa_val_anls,0.21937034023427093,0.006858602078113178 Cambrian,19000,mme_total_score,1269.9476790716285, Cambrian,19000,mmmu_val_mmmu_acc,0.28556, Cambrian,19000,mmstar_average,0.3314266960826673, Cambrian,19000,ocrbench_ocrbench_accuracy,0.404, Cambrian,19000,seedbench_seed_all,0.5465814341300722, Cambrian,19000,textvqa_val_exact_match,0.51512,0.006773909823053313 Cambrian,20000,ai2d_exact_match,0.45531088082901555,0.008963137311190377 Cambrian,20000,average,0.42817340693945505, Cambrian,20000,average_rank,2.4, Cambrian,20000,chartqa_relaxed_overall,0.5684,0.009907968668564455 Cambrian,20000,docvqa_val_anls,0.549188563518089,0.006325944032596611 Cambrian,20000,infovqa_val_anls,0.21755406764942647,0.0068363256354831885 Cambrian,20000,mme_total_score,1290.6296518607442, Cambrian,20000,mmmu_val_mmmu_acc,0.28444, Cambrian,20000,mmstar_average,0.32485343172593534, Cambrian,20000,ocrbench_ocrbench_accuracy,0.392, Cambrian,20000,seedbench_seed_all,0.5486937187326293, Cambrian,20000,textvqa_val_exact_match,0.51312,0.006789609184524225 LLaVa,1000,ai2d_exact_match,0.25777202072538863,0.007872600874396432 LLaVa,1000,average,0.2581360512843851, LLaVa,1000,average_rank,3.0, LLaVa,1000,chartqa_relaxed_overall,0.1576,0.007288768514542319 LLaVa,1000,docvqa_val_anls,0.2850280465017524,0.005237571860745478 LLaVa,1000,infovqa_val_anls,0.15291302898150733,0.005597827181699182 LLaVa,1000,mme_total_score,844.0894357743098, LLaVa,1000,mmmu_val_mmmu_acc,0.25333, LLaVa,1000,mmstar_average,0.22969486173769915, LLaVa,1000,ocrbench_ocrbench_accuracy,0.35, LLaVa,1000,seedbench_seed_all,0.2717065036131184, LLaVa,1000,textvqa_val_exact_match,0.36518,0.006561838543046682 LLaVa,2000,ai2d_exact_match,0.24676165803108807,0.007759553547248649 LLaVa,2000,average,0.28023175511348764, LLaVa,2000,average_rank,3.2, LLaVa,2000,chartqa_relaxed_overall,0.19,0.007847587772910948 LLaVa,2000,docvqa_val_anls,0.31839133336930814,0.005353711170722305 LLaVa,2000,infovqa_val_anls,0.1625232406439703,0.005680709103352321 LLaVa,2000,mme_total_score,677.0834333733493, LLaVa,2000,mmmu_val_mmmu_acc,0.25111, LLaVa,2000,mmstar_average,0.2602226545829147, LLaVa,2000,ocrbench_ocrbench_accuracy,0.389, LLaVa,2000,seedbench_seed_all,0.2864369093941078, LLaVa,2000,textvqa_val_exact_match,0.41764000000000007,0.006695635323587844 LLaVa,3000,ai2d_exact_match,0.31541450777202074,0.00836346730591157 LLaVa,3000,average,0.3241247472461608, LLaVa,3000,average_rank,3.1, LLaVa,3000,chartqa_relaxed_overall,0.2048,0.008072722684486087 LLaVa,3000,docvqa_val_anls,0.33927313841893186,0.005424261898744584 LLaVa,3000,infovqa_val_anls,0.17400826017663457,0.005878416771815313 LLaVa,3000,mme_total_score,674.5895358143258, LLaVa,3000,mmmu_val_mmmu_acc,0.27778, LLaVa,3000,mmstar_average,0.28839612401739867, LLaVa,3000,ocrbench_ocrbench_accuracy,0.428, LLaVa,3000,seedbench_seed_all,0.4512506948304614, LLaVa,3000,textvqa_val_exact_match,0.4382,0.006743326070219196 LLaVa,4000,ai2d_exact_match,0.30667098445595853,0.008299228398743067 LLaVa,4000,average,0.34151562451124173, LLaVa,4000,average_rank,2.8, LLaVa,4000,chartqa_relaxed_overall,0.2168,0.00824295350666284 LLaVa,4000,docvqa_val_anls,0.36894439928615425,0.005583877165382837 LLaVa,4000,infovqa_val_anls,0.1815741433661475,0.005975096001960774 LLaVa,4000,mme_total_score,660.3387354941976, LLaVa,4000,mmmu_val_mmmu_acc,0.29444, LLaVa,4000,mmstar_average,0.3089940618086463, LLaVa,4000,ocrbench_ocrbench_accuracy,0.439, LLaVa,4000,seedbench_seed_all,0.48265703168426904, LLaVa,4000,textvqa_val_exact_match,0.4745599999999999,0.006778004835488831 LLaVa,5000,ai2d_exact_match,0.3176813471502591,0.00837955903737489 LLaVa,5000,average,0.3488971740226244, LLaVa,5000,average_rank,2.9, LLaVa,5000,chartqa_relaxed_overall,0.2076,0.008113397986710395 LLaVa,5000,docvqa_val_anls,0.37667351380566144,0.005504553709162657 LLaVa,5000,infovqa_val_anls,0.19157302816202296,0.006066754825254386 LLaVa,5000,mme_total_score,596.045218087235, LLaVa,5000,mmmu_val_mmmu_acc,0.28889, LLaVa,5000,mmstar_average,0.30911460927022283, LLaVa,5000,ocrbench_ocrbench_accuracy,0.471, LLaVa,5000,seedbench_seed_all,0.49972206781545303, LLaVa,5000,textvqa_val_exact_match,0.47781999999999997,0.00678922884027701 LLaVa,6000,ai2d_exact_match,0.3626943005181347,0.00865318426683941 LLaVa,6000,average,0.35336013036474917, LLaVa,6000,average_rank,3.3, LLaVa,6000,chartqa_relaxed_overall,0.2164,0.00823744852629073 LLaVa,6000,docvqa_val_anls,0.3796381971300078,0.005512363416378596 LLaVa,6000,infovqa_val_anls,0.1911083172357537,0.00606756561226675 LLaVa,6000,mme_total_score,751.7179871948779, LLaVa,6000,mmmu_val_mmmu_acc,0.27111, LLaVa,6000,mmstar_average,0.3230226430014031, LLaVa,6000,ocrbench_ocrbench_accuracy,0.471, LLaVa,6000,seedbench_seed_all,0.49788771539744303, LLaVa,6000,textvqa_val_exact_match,0.46738,0.006777431212101451 LLaVa,7000,ai2d_exact_match,0.3636658031088083,0.008658158841882565 LLaVa,7000,average,0.36232264653787655, LLaVa,7000,average_rank,3.4, LLaVa,7000,chartqa_relaxed_overall,0.2276,0.00838733777631434 LLaVa,7000,docvqa_val_anls,0.38862032747814834,0.005554025202613156 LLaVa,7000,infovqa_val_anls,0.1987523491607365,0.006169459873730798 LLaVa,7000,mme_total_score,700.0341136454582, LLaVa,7000,mmmu_val_mmmu_acc,0.28, LLaVa,7000,mmstar_average,0.32238002502982693, LLaVa,7000,ocrbench_ocrbench_accuracy,0.469, LLaVa,7000,seedbench_seed_all,0.5175653140633686, LLaVa,7000,textvqa_val_exact_match,0.49332,0.006784414578741135 LLaVa,8000,ai2d_exact_match,0.38244818652849744,0.008746910624026853 LLaVa,8000,average,0.36916094621046264, LLaVa,8000,average_rank,2.8, LLaVa,8000,chartqa_relaxed_overall,0.2276,0.00838733777631434 LLaVa,8000,docvqa_val_anls,0.4000384036155175,0.005647492303754258 LLaVa,8000,infovqa_val_anls,0.20267340215584623,0.006186451136703468 LLaVa,8000,mme_total_score,787.0998399359744, LLaVa,8000,mmmu_val_mmmu_acc,0.28333, LLaVa,8000,mmstar_average,0.33877512170436386, LLaVa,8000,ocrbench_ocrbench_accuracy,0.47, LLaVa,8000,seedbench_seed_all,0.5221234018899389, LLaVa,8000,textvqa_val_exact_match,0.49546,0.006796875545678079 LLaVa,9000,ai2d_exact_match,0.3856865284974093,0.008760803506529557 LLaVa,9000,average,0.3660729124456708, LLaVa,9000,average_rank,3.0, LLaVa,9000,chartqa_relaxed_overall,0.2212,0.00830275847651416 LLaVa,9000,docvqa_val_anls,0.3961556104365206,0.005555787005997977 LLaVa,9000,infovqa_val_anls,0.20795411138332273,0.006302696156883479 LLaVa,9000,mme_total_score,697.6510604241697, LLaVa,9000,mmmu_val_mmmu_acc,0.27444, LLaVa,9000,mmstar_average,0.33019217959261743, LLaVa,9000,ocrbench_ocrbench_accuracy,0.47, LLaVa,9000,seedbench_seed_all,0.5140077821011673, LLaVa,9000,textvqa_val_exact_match,0.49501999999999996,0.006795224421237829 LLaVa,10000,ai2d_exact_match,0.3636658031088083,0.008658158841882561 LLaVa,10000,average,0.36465272894871764, LLaVa,10000,average_rank,3.1, LLaVa,10000,chartqa_relaxed_overall,0.2216,0.008308127706914342 LLaVa,10000,docvqa_val_anls,0.3905169927438113,0.005559588309122447 LLaVa,10000,infovqa_val_anls,0.210842797817216,0.0062742161273205005 LLaVa,10000,mme_total_score,710.1757703081232, LLaVa,10000,mmmu_val_mmmu_acc,0.25667, LLaVa,10000,mmstar_average,0.33485115141559363, LLaVa,10000,ocrbench_ocrbench_accuracy,0.484, LLaVa,10000,seedbench_seed_all,0.5220678154530295, LLaVa,10000,textvqa_val_exact_match,0.49766000000000005,0.0067820722630208075 LLaVa,11000,ai2d_exact_match,0.3539507772020725,0.008606685322379343 LLaVa,11000,average,0.3619647158138698, LLaVa,11000,average_rank,3.3, LLaVa,11000,chartqa_relaxed_overall,0.226,0.008366456779283321 LLaVa,11000,docvqa_val_anls,0.39615321520069524,0.0055548098783566 LLaVa,11000,infovqa_val_anls,0.20231707967850712,0.006189706400735626 LLaVa,11000,mme_total_score,620.8629451780713, LLaVa,11000,mmmu_val_mmmu_acc,0.26778, LLaVa,11000,mmstar_average,0.3504522318333254, LLaVa,11000,ocrbench_ocrbench_accuracy,0.48, LLaVa,11000,seedbench_seed_all,0.5084491384102279, LLaVa,11000,textvqa_val_exact_match,0.47257999999999994,0.0067942373414689025 LLaVa,12000,ai2d_exact_match,0.3963730569948187,0.008803757198545707 LLaVa,12000,average,0.36835635606525785, LLaVa,12000,average_rank,3.1, LLaVa,12000,chartqa_relaxed_overall,0.234,0.008469137530835504 LLaVa,12000,docvqa_val_anls,0.3998087503562603,0.005606788206948343 LLaVa,12000,infovqa_val_anls,0.19486992137918643,0.006137557366661157 LLaVa,12000,mme_total_score,707.7871148459384, LLaVa,12000,mmmu_val_mmmu_acc,0.26444, LLaVa,12000,mmstar_average,0.34510216846405867, LLaVa,12000,ocrbench_ocrbench_accuracy,0.466, LLaVa,12000,seedbench_seed_all,0.5159533073929962, LLaVa,12000,textvqa_val_exact_match,0.49866000000000005,0.006787787245571138 LLaVa,13000,ai2d_exact_match,0.37661917098445596,0.008720866089740391 LLaVa,13000,average,0.3660925061677603, LLaVa,13000,average_rank,3.2, LLaVa,13000,chartqa_relaxed_overall,0.23,0.008418334000200726 LLaVa,13000,docvqa_val_anls,0.39678037656395876,0.005562201990102385 LLaVa,13000,infovqa_val_anls,0.20007389352596994,0.006181717086032354 LLaVa,13000,mme_total_score,762.4510804321728, LLaVa,13000,mmmu_val_mmmu_acc,0.26111, LLaVa,13000,mmstar_average,0.3487764851969923, LLaVa,13000,ocrbench_ocrbench_accuracy,0.487, LLaVa,13000,seedbench_seed_all,0.5187326292384659, LLaVa,13000,textvqa_val_exact_match,0.47573999999999994,0.006786037174972445 LLaVa,14000,ai2d_exact_match,0.40382124352331605,0.008831094143874325 LLaVa,14000,average,0.3665520961603681, LLaVa,14000,average_rank,3.5, LLaVa,14000,chartqa_relaxed_overall,0.224,0.0083401092900026 LLaVa,14000,docvqa_val_anls,0.39653795108545226,0.0055480083540036754 LLaVa,14000,infovqa_val_anls,0.1966338205713239,0.006145830112184984 LLaVa,14000,mme_total_score,648.8810524209684, LLaVa,14000,mmmu_val_mmmu_acc,0.27222, LLaVa,14000,mmstar_average,0.3348780070169728, LLaVa,14000,ocrbench_ocrbench_accuracy,0.482, LLaVa,14000,seedbench_seed_all,0.5121178432462479, LLaVa,14000,textvqa_val_exact_match,0.47676,0.006784540255411228 LLaVa,15000,ai2d_exact_match,0.38374352331606215,0.008752516998880439 LLaVa,15000,average,0.3656314014070533, LLaVa,15000,average_rank,3.3, LLaVa,15000,chartqa_relaxed_overall,0.222,0.008313485768211027 LLaVa,15000,docvqa_val_anls,0.3956148602850384,0.005571289516040145 LLaVa,15000,infovqa_val_anls,0.2003939669503818,0.006205919365204143 LLaVa,15000,mme_total_score,744.8995598239295, LLaVa,15000,mmmu_val_mmmu_acc,0.25111, LLaVa,15000,mmstar_average,0.34431451447442113, LLaVa,15000,ocrbench_ocrbench_accuracy,0.491, LLaVa,15000,seedbench_seed_all,0.5223457476375765, LLaVa,15000,textvqa_val_exact_match,0.48016000000000003,0.006780152577471598 LLaVa,16000,ai2d_exact_match,0.38244818652849744,0.008746910624026851 LLaVa,16000,average,0.3664952284054124, LLaVa,16000,average_rank,3.1, LLaVa,16000,chartqa_relaxed_overall,0.2272,0.008382133861209024 LLaVa,16000,docvqa_val_anls,0.3971604594021061,0.005596507964441207 LLaVa,16000,infovqa_val_anls,0.20130541865614268,0.006177273754737603 LLaVa,16000,mme_total_score,741.5084033613446, LLaVa,16000,mmmu_val_mmmu_acc,0.25444, LLaVa,16000,mmstar_average,0.34322789378570057, LLaVa,16000,ocrbench_ocrbench_accuracy,0.488, LLaVa,16000,seedbench_seed_all,0.5151750972762645, LLaVa,16000,textvqa_val_exact_match,0.4895,0.0067890182024819105 LLaVa,17000,ai2d_exact_match,0.36852331606217614,0.008682460781863906 LLaVa,17000,average,0.3659850040618015, LLaVa,17000,average_rank,3.0, LLaVa,17000,chartqa_relaxed_overall,0.2264,0.008371693383064148 LLaVa,17000,docvqa_val_anls,0.3895535425900796,0.005559420230793686 LLaVa,17000,infovqa_val_anls,0.19870913061640477,0.0061833458200064835 LLaVa,17000,mme_total_score,738.0654261704681, LLaVa,17000,mmmu_val_mmmu_acc,0.27667, LLaVa,17000,mmstar_average,0.3488362957589257, LLaVa,17000,ocrbench_ocrbench_accuracy,0.486, LLaVa,17000,seedbench_seed_all,0.514952751528627, LLaVa,17000,textvqa_val_exact_match,0.48422,0.006797929147037179 LLaVa,18000,ai2d_exact_match,0.3785621761658031,0.008729696327646351 LLaVa,18000,average,0.3667559662544118, LLaVa,18000,average_rank,3.1, LLaVa,18000,chartqa_relaxed_overall,0.2268,0.008376919070233621 LLaVa,18000,docvqa_val_anls,0.39054490192374947,0.005557124380968682 LLaVa,18000,infovqa_val_anls,0.19983100041999644,0.006171606410532323 LLaVa,18000,mme_total_score,746.5269107643057, LLaVa,18000,mmmu_val_mmmu_acc,0.27, LLaVa,18000,mmstar_average,0.3522401814266279, LLaVa,18000,ocrbench_ocrbench_accuracy,0.497, LLaVa,18000,seedbench_seed_all,0.5137854363535297, LLaVa,18000,textvqa_val_exact_match,0.47203999999999996,0.006793178720998519 LLaVa,19000,ai2d_exact_match,0.3707901554404145,0.008693477555877339 LLaVa,19000,average,0.3627892845719615, LLaVa,19000,average_rank,3.2, LLaVa,19000,chartqa_relaxed_overall,0.2284,0.008397713059747491 LLaVa,19000,docvqa_val_anls,0.3886627325813464,0.005572189741680524 LLaVa,19000,infovqa_val_anls,0.18766806187395813,0.006047287494792444 LLaVa,19000,mme_total_score,735.0644257703082, LLaVa,19000,mmmu_val_mmmu_acc,0.27556, LLaVa,19000,mmstar_average,0.34617955399790473, LLaVa,19000,ocrbench_ocrbench_accuracy,0.487, LLaVa,19000,seedbench_seed_all,0.50550305725403, LLaVa,19000,textvqa_val_exact_match,0.47534,0.00678734045691651 LLaVa,20000,ai2d_exact_match,0.3746761658031088,0.008711886524907501 LLaVa,20000,average,0.3636232406961286, LLaVa,20000,average_rank,3.3, LLaVa,20000,chartqa_relaxed_overall,0.2224,0.00831883268198588 LLaVa,20000,docvqa_val_anls,0.3865323770909091,0.005551659686181904 LLaVa,20000,infovqa_val_anls,0.1967140503390298,0.006138459642690392 LLaVa,20000,mme_total_score,688.5517206882753, LLaVa,20000,mmmu_val_mmmu_acc,0.27556, LLaVa,20000,mmstar_average,0.3525069399025931, LLaVa,20000,ocrbench_ocrbench_accuracy,0.494, LLaVa,20000,seedbench_seed_all,0.5113396331295164, LLaVa,20000,textvqa_val_exact_match,0.45888,0.006775175991953595