FineVision / app /src /content /assets /data /internal_deduplication.csv
lusxvr's picture
new plots
a024e38
raw
history blame
47.1 kB
run,step,metric,value,stderr
Baseline,300,ai2d_exact_match,0.2551813471502591,0.007846598309236504
Baseline,300,average,0.1836384379377178,
Baseline,300,average_rank,1.4444444444444444,
Baseline,300,chartqa_relaxed_overall,0.1328,0.006788526912302523
Baseline,300,docvqa_val_anls,0.1503143424142802,0.004151727384820528
Baseline,300,infovqa_val_anls,0.11374396685909084,0.005163280990095591
Baseline,300,mme_total_score,691.1952781112445,
Baseline,300,mmmu_val_mmmu_acc,0.26556,
Baseline,300,mmstar_average,0.2859278470781123,
Baseline,300,ocrbench_ocrbench_accuracy,0.149,
Baseline,300,textvqa_val_exact_match,0.11657999999999999,0.004405144921606561
Baseline,1500,ai2d_exact_match,0.27525906735751293,0.008038849490577982
Baseline,1500,average,0.318819844462715,
Baseline,1500,average_rank,1.2222222222222223,
Baseline,1500,chartqa_relaxed_overall,0.374,0.009679208378267924
Baseline,1500,docvqa_val_anls,0.437411196849637,0.0061765544267728045
Baseline,1500,infovqa_val_anls,0.21582289145457856,0.006873661480889723
Baseline,1500,mme_total_score,1066.704581832733,
Baseline,1500,mmmu_val_mmmu_acc,0.24,
Baseline,1500,mmstar_average,0.23474560003999134,
Baseline,1500,ocrbench_ocrbench_accuracy,0.411,
Baseline,1500,textvqa_val_exact_match,0.36232000000000003,0.006579840604488538
Baseline,2700,ai2d_exact_match,0.27849740932642486,0.008067913113285858
Baseline,2700,average,0.36471172748595665,
Baseline,2700,average_rank,1.4444444444444444,
Baseline,2700,chartqa_relaxed_overall,0.4624,0.00997367964766694
Baseline,2700,docvqa_val_anls,0.4953558755845657,0.006275075768152338
Baseline,2700,infovqa_val_anls,0.20975551937756792,0.006468441430093479
Baseline,2700,mme_total_score,1172.469887955182,
Baseline,2700,mmmu_val_mmmu_acc,0.27111,
Baseline,2700,mmstar_average,0.2503150155990948,
Baseline,2700,ocrbench_ocrbench_accuracy,0.486,
Baseline,2700,textvqa_val_exact_match,0.46426000000000006,0.006792330795207658
Baseline,3900,ai2d_exact_match,0.35038860103626945,0.008586842325753156
Baseline,3900,average,0.398537125609502,
Baseline,3900,average_rank,1.4444444444444444,
Baseline,3900,chartqa_relaxed_overall,0.4948,0.010001459677380663
Baseline,3900,docvqa_val_anls,0.5407649774017467,0.00626354456311192
Baseline,3900,infovqa_val_anls,0.22943878312324553,0.006664668392753554
Baseline,3900,mme_total_score,1168.9393757503,
Baseline,3900,mmmu_val_mmmu_acc,0.27,
Baseline,3900,mmstar_average,0.3015046433147543,
Baseline,3900,ocrbench_ocrbench_accuracy,0.517,
Baseline,3900,textvqa_val_exact_match,0.4844,0.006794038548018284
Baseline,5100,ai2d_exact_match,0.3898963730569948,0.008778252852376944
Baseline,5100,average,0.42767475240113806,
Baseline,5100,average_rank,1.2222222222222223,
Baseline,5100,chartqa_relaxed_overall,0.5264,0.009988048880946633
Baseline,5100,docvqa_val_anls,0.5781350651939515,0.006244324391533268
Baseline,5100,infovqa_val_anls,0.2546269175216946,0.007112814176935012
Baseline,5100,mme_total_score,1185.1023409363747,
Baseline,5100,mmmu_val_mmmu_acc,0.29222,
Baseline,5100,mmstar_average,0.33637966343646347,
Baseline,5100,ocrbench_ocrbench_accuracy,0.533,
Baseline,5100,textvqa_val_exact_match,0.51074,0.0068004249599511925
Baseline,6300,ai2d_exact_match,0.41515544041450775,0.00886864516657515
Baseline,6300,average,0.43890688312888254,
Baseline,6300,average_rank,1.4444444444444444,
Baseline,6300,chartqa_relaxed_overall,0.5388,0.0099718403035556
Baseline,6300,docvqa_val_anls,0.6024512173813115,0.006190216536053702
Baseline,6300,infovqa_val_anls,0.2548412895443468,0.007030638027408485
Baseline,6300,mme_total_score,1187.329431772709,
Baseline,6300,mmmu_val_mmmu_acc,0.30667,
Baseline,6300,mmstar_average,0.3500771176908943,
Baseline,6300,ocrbench_ocrbench_accuracy,0.516,
Baseline,6300,textvqa_val_exact_match,0.52726,0.006770298802059908
Baseline,7500,ai2d_exact_match,0.42972797927461137,0.008909832364541428
Baseline,7500,average,0.44878537461255386,
Baseline,7500,average_rank,1.3333333333333333,
Baseline,7500,chartqa_relaxed_overall,0.5728,0.009895414680177737
Baseline,7500,docvqa_val_anls,0.6164034078362094,0.006122657396260068
Baseline,7500,infovqa_val_anls,0.25244937386016403,0.006941949044716374
Baseline,7500,mme_total_score,1282.560024009604,
Baseline,7500,mmmu_val_mmmu_acc,0.29667,
Baseline,7500,mmstar_average,0.3339722359294459,
Baseline,7500,ocrbench_ocrbench_accuracy,0.558,
Baseline,7500,textvqa_val_exact_match,0.5302600000000001,0.0067524799649562395
Baseline,8700,ai2d_exact_match,0.44527202072538863,0.008945084019331404
Baseline,8700,average,0.4558942646480554,
Baseline,8700,average_rank,1.5555555555555556,
Baseline,8700,chartqa_relaxed_overall,0.5852,0.009855721084488851
Baseline,8700,docvqa_val_anls,0.6221835109907441,0.006147036255020746
Baseline,8700,infovqa_val_anls,0.25900127209441604,0.006885435292484948
Baseline,8700,mme_total_score,1182.047919167667,
Baseline,8700,mmmu_val_mmmu_acc,0.30333,
Baseline,8700,mmstar_average,0.3299073133738943,
Baseline,8700,ocrbench_ocrbench_accuracy,0.559,
Baseline,8700,textvqa_val_exact_match,0.54326,0.0067297527736521565
Baseline,9900,ai2d_exact_match,0.4520725388601036,0.008957715852675529
Baseline,9900,average,0.4655685311713072,
Baseline,9900,average_rank,1.5555555555555556,
Baseline,9900,chartqa_relaxed_overall,0.5888,0.009842996384797287
Baseline,9900,docvqa_val_anls,0.6443822232919176,0.006072644236356477
Baseline,9900,infovqa_val_anls,0.2707219279967856,0.007060292176646616
Baseline,9900,mme_total_score,1293.4631852741097,
Baseline,9900,mmmu_val_mmmu_acc,0.30444,
Baseline,9900,mmstar_average,0.34327155922165065,
Baseline,9900,ocrbench_ocrbench_accuracy,0.557,
Baseline,9900,textvqa_val_exact_match,0.56386,0.006703146016110842
Baseline,11100,ai2d_exact_match,0.4494818652849741,0.008953103134587198
Baseline,11100,average,0.471077301321738,
Baseline,11100,average_rank,1.6666666666666667,
Baseline,11100,chartqa_relaxed_overall,0.5948,0.009820578470976232
Baseline,11100,docvqa_val_anls,0.657973309294109,0.006015458191652746
Baseline,11100,infovqa_val_anls,0.29696232573726855,0.007574623301736419
Baseline,11100,mme_total_score,1338.3029211684673,
Baseline,11100,mmmu_val_mmmu_acc,0.29667,
Baseline,11100,mmstar_average,0.3394909102575524,
Baseline,11100,ocrbench_ocrbench_accuracy,0.565,
Baseline,11100,textvqa_val_exact_match,0.56824,0.006679879088496093
Baseline,12300,ai2d_exact_match,0.4676165803108808,0.008980259712600086
Baseline,12300,average,0.47342294699365395,
Baseline,12300,average_rank,1.5555555555555556,
Baseline,12300,chartqa_relaxed_overall,0.598,0.009808000752013664
Baseline,12300,docvqa_val_anls,0.6588847758219586,0.00602421968017162
Baseline,12300,infovqa_val_anls,0.2830975650419957,0.007216197962807829
Baseline,12300,mme_total_score,1269.7461984793918,
Baseline,12300,mmmu_val_mmmu_acc,0.28333,
Baseline,12300,mmstar_average,0.3693946547743964,
Baseline,12300,ocrbench_ocrbench_accuracy,0.559,
Baseline,12300,textvqa_val_exact_match,0.5680599999999999,0.006686980665598219
Baseline,13500,ai2d_exact_match,0.47085492227979275,0.008983852707691612
Baseline,13500,average,0.48226394524672617,
Baseline,13500,average_rank,1.5555555555555556,
Baseline,13500,chartqa_relaxed_overall,0.618,0.009719474639861454
Baseline,13500,docvqa_val_anls,0.6663692127257962,0.005978102603390597
Baseline,13500,infovqa_val_anls,0.32051341945189793,0.007779116582967409
Baseline,13500,mme_total_score,1202.768607442977,
Baseline,13500,mmmu_val_mmmu_acc,0.28,
Baseline,13500,mmstar_average,0.35477400751632243,
Baseline,13500,ocrbench_ocrbench_accuracy,0.569,
Baseline,13500,textvqa_val_exact_match,0.5785999999999999,0.006676145758177908
Baseline,14700,ai2d_exact_match,0.46567357512953367,0.008977921602780724
Baseline,14700,average,0.48621829332317545,
Baseline,14700,average_rank,1.5555555555555556,
Baseline,14700,chartqa_relaxed_overall,0.6296,0.0096601689190934
Baseline,14700,docvqa_val_anls,0.6810941724065047,0.005910647813959628
Baseline,14700,infovqa_val_anls,0.3016034504434661,0.007417514325399065
Baseline,14700,mme_total_score,1281.9612845138056,
Baseline,14700,mmmu_val_mmmu_acc,0.29778,
Baseline,14700,mmstar_average,0.365895148605899,
Baseline,14700,ocrbench_ocrbench_accuracy,0.562,
Baseline,14700,textvqa_val_exact_match,0.5861,0.006642001297519238
Baseline,15900,ai2d_exact_match,0.48186528497409326,0.008993233105757854
Baseline,15900,average,0.48999290982002447,
Baseline,15900,average_rank,1.5,
Baseline,15900,chartqa_relaxed_overall,0.64,0.009601920576192066
Baseline,15900,docvqa_val_anls,0.6858324657211811,0.00589619582327283
Baseline,15900,infovqa_val_anls,0.2913749730393032,0.007302812648430173
Baseline,15900,mme_total_score,1296.9955982392958,
Baseline,15900,mmmu_val_mmmu_acc,0.29111,
Baseline,15900,mmstar_average,0.35848055482561814,
Baseline,15900,ocrbench_ocrbench_accuracy,0.581,
Baseline,15900,textvqa_val_exact_match,0.59028,0.006635865524726405
Baseline,17100,ai2d_exact_match,0.4740932642487047,0.008987066275159845
Baseline,17100,average,0.4931189092163302,
Baseline,17100,average_rank,1.7777777777777777,
Baseline,17100,chartqa_relaxed_overall,0.644,0.009578219924326623
Baseline,17100,docvqa_val_anls,0.6847803896363295,0.005919128355709122
Baseline,17100,infovqa_val_anls,0.3018247984331409,0.007408081810180743
Baseline,17100,mme_total_score,1262.8012204881952,
Baseline,17100,mmmu_val_mmmu_acc,0.28444,
Baseline,17100,mmstar_average,0.36583282141246676,
Baseline,17100,ocrbench_ocrbench_accuracy,0.588,
Baseline,17100,textvqa_val_exact_match,0.6019800000000001,0.0065905009567234045
Baseline,18300,ai2d_exact_match,0.4876943005181347,0.008996428218289523
Baseline,18300,average,0.5004883767088391,
Baseline,18300,average_rank,1.5,
Baseline,18300,chartqa_relaxed_overall,0.652,0.00952862623294433
Baseline,18300,docvqa_val_anls,0.6975218894019752,0.005845051202995877
Baseline,18300,infovqa_val_anls,0.3185079040699619,0.007608667971660477
Baseline,18300,mme_total_score,1310.265706282513,
Baseline,18300,mmmu_val_mmmu_acc,0.29556,
Baseline,18300,mmstar_average,0.36108291968064027,
Baseline,18300,ocrbench_ocrbench_accuracy,0.588,
Baseline,18300,textvqa_val_exact_match,0.60354,0.006611280926348344
Baseline,19500,ai2d_exact_match,0.47765544041450775,0.00899016344465196
Baseline,19500,average,0.5040547762672563,
Baseline,19500,average_rank,1.4444444444444444,
Baseline,19500,chartqa_relaxed_overall,0.6552,0.009507962165354631
Baseline,19500,docvqa_val_anls,0.7041825239698998,0.005808767160221614
Baseline,19500,infovqa_val_anls,0.3209241432627218,0.007605560217474187
Baseline,19500,mme_total_score,1295.3964585834333,
Baseline,19500,mmmu_val_mmmu_acc,0.30333,
Baseline,19500,mmstar_average,0.35936610249092044,
Baseline,19500,ocrbench_ocrbench_accuracy,0.604,
Baseline,19500,textvqa_val_exact_match,0.60778,0.006595164407254131
Baseline,20700,ai2d_exact_match,0.49190414507772023,0.008997974381217105
Baseline,20700,average,0.5348651598748863,
Baseline,20700,average_rank,1.25,
Baseline,20700,chartqa_relaxed_overall,0.6472,0.009558734841217527
Baseline,20700,docvqa_val_anls,0.70377508713271,0.005815829966103309
Baseline,20700,infovqa_val_anls,0.31228879567103124,0.0074592773891107925
Baseline,20700,mme_total_score,1267.3561424569828,
Baseline,20700,mmstar_average,0.36086809124274183,
Baseline,20700,ocrbench_ocrbench_accuracy,0.605,
Baseline,20700,textvqa_val_exact_match,0.62302,0.006536647571369781
Baseline,21900,ai2d_exact_match,0.49125647668393785,0.008997778057794698
Baseline,21900,average,0.5035549318138456,
Baseline,21900,average_rank,1.4444444444444444,
Baseline,21900,chartqa_relaxed_overall,0.6556,0.009505345687488459
Baseline,21900,docvqa_val_anls,0.7044656227681543,0.005797355786446792
Baseline,21900,infovqa_val_anls,0.3214548388700204,0.007656455061893302
Baseline,21900,mme_total_score,1270.262104841937,
Baseline,21900,mmmu_val_mmmu_acc,0.28111,
Baseline,21900,mmstar_average,0.36167251618865237,
Baseline,21900,ocrbench_ocrbench_accuracy,0.597,
Baseline,21900,textvqa_val_exact_match,0.61588,0.006563701818052925
Baseline,23100,ai2d_exact_match,0.49319948186528495,0.008998321712163856
Baseline,23100,average,0.5385543058304301,
Baseline,23100,average_rank,1.5,
Baseline,23100,chartqa_relaxed_overall,0.6592,0.009481461028833927
Baseline,23100,docvqa_val_anls,0.7121972356483652,0.005769225218375019
Baseline,23100,infovqa_val_anls,0.31967136620122777,0.007611618366213475
Baseline,23100,mme_total_score,1318.2786114445778,
Baseline,23100,mmstar_average,0.3630320570981325,
Baseline,23100,ocrbench_ocrbench_accuracy,0.602,
Baseline,23100,textvqa_val_exact_match,0.62058,0.006524799408523169
Baseline,24300,ai2d_exact_match,0.49255181347150256,0.008998155599035915
Baseline,24300,average,0.5094308504545716,
Baseline,24300,average_rank,1.5555555555555556,
Baseline,24300,chartqa_relaxed_overall,0.6704,0.009403239035659185
Baseline,24300,docvqa_val_anls,0.7177853964151442,0.005720014481294498
Baseline,24300,infovqa_val_anls,0.31972012794378407,0.007606738233281323
Baseline,24300,mme_total_score,1306.592336934774,
Baseline,24300,mmmu_val_mmmu_acc,0.29778,
Baseline,24300,mmstar_average,0.37076946580614156,
Baseline,24300,ocrbench_ocrbench_accuracy,0.59,
Baseline,24300,textvqa_val_exact_match,0.6164400000000001,0.006543401905866729
Baseline,25500,ai2d_exact_match,0.501619170984456,0.008999106932714636
Baseline,25500,average,0.5486249165918439,
Baseline,25500,average_rank,1.625,
Baseline,25500,chartqa_relaxed_overall,0.6752,0.00936787525721462
Baseline,25500,docvqa_val_anls,0.7137288248520355,0.0057597420625403505
Baseline,25500,infovqa_val_anls,0.34135511904919924,0.0077802284678825705
Baseline,25500,mme_total_score,1323.6883753501402,
Baseline,25500,mmstar_average,0.369071301257217,
Baseline,25500,ocrbench_ocrbench_accuracy,0.619,
Baseline,25500,textvqa_val_exact_match,0.6204,0.00653548089294892
Baseline,26700,ai2d_exact_match,0.4990284974093264,0.008999137132137064
Baseline,26700,average,0.5171016246428288,
Baseline,26700,average_rank,1.4444444444444444,
Baseline,26700,chartqa_relaxed_overall,0.6712,0.009397422445513864
Baseline,26700,docvqa_val_anls,0.7233130041233962,0.005709000608468465
Baseline,26700,infovqa_val_anls,0.34093933218960265,0.007871398735359877
Baseline,26700,mme_total_score,1290.1798719487797,
Baseline,26700,mmmu_val_mmmu_acc,0.29889,
Baseline,26700,mmstar_average,0.3681821634203056,
Baseline,26700,ocrbench_ocrbench_accuracy,0.602,
Baseline,26700,textvqa_val_exact_match,0.63326,0.006491932186699375
Baseline,27900,ai2d_exact_match,0.49773316062176165,0.008999061633391479
Baseline,27900,average,0.5456332793229398,
Baseline,27900,average_rank,1.625,
Baseline,27900,chartqa_relaxed_overall,0.6756,0.009364877808842454
Baseline,27900,docvqa_val_anls,0.7132690678246167,0.00575358310740901
Baseline,27900,infovqa_val_anls,0.3362338249924974,0.007684149470716349
Baseline,27900,mme_total_score,1267.1172468987595,
Baseline,27900,mmstar_average,0.3725169018217032,
Baseline,27900,ocrbench_ocrbench_accuracy,0.599,
Baseline,27900,textvqa_val_exact_match,0.62508,0.006518059200340837
Baseline,29100,ai2d_exact_match,0.5019430051813472,0.008999086170553228
Baseline,29100,average,0.5238317316407767,
Baseline,29100,average_rank,1.0,
Baseline,29100,chartqa_relaxed_overall,0.6828,0.009309582768982347
Baseline,29100,docvqa_val_anls,0.7233823673869951,0.005705166797815572
Baseline,29100,infovqa_val_anls,0.34214735285161113,0.007759163899965965
Baseline,29100,mme_total_score,1321.8040216086433,
Baseline,29100,mmmu_val_mmmu_acc,0.31222,
Baseline,29100,mmstar_average,0.3709411277062599,
Baseline,29100,ocrbench_ocrbench_accuracy,0.622,
Baseline,29100,textvqa_val_exact_match,0.6352199999999999,0.00647159073314463
Baseline,30300,ai2d_exact_match,0.5055051813471503,0.008998608627616667
Baseline,30300,average,0.5497034826600226,
Baseline,30300,average_rank,1.375,
Baseline,30300,chartqa_relaxed_overall,0.6784,0.009343676884347384
Baseline,30300,docvqa_val_anls,0.7227075209990185,0.005720573311731873
Baseline,30300,infovqa_val_anls,0.33249900926543363,0.007751325884024483
Baseline,30300,mme_total_score,1290.3790516206482,
Baseline,30300,mmstar_average,0.36331266700855536,
Baseline,30300,ocrbench_ocrbench_accuracy,0.612,
Baseline,30300,textvqa_val_exact_match,0.6335,0.006488911402865572
Baseline,31500,ai2d_exact_match,0.4993523316062176,0.008999146569435543
Baseline,31500,average,0.5220721222554265,
Baseline,31500,average_rank,1.5555555555555556,
Baseline,31500,chartqa_relaxed_overall,0.6872,0.009274528060677767
Baseline,31500,docvqa_val_anls,0.732681296661989,0.005643494305560718
Baseline,31500,infovqa_val_anls,0.34453436089995576,0.007841367492503165
Baseline,31500,mme_total_score,1304.8996598639455,
Baseline,31500,mmmu_val_mmmu_acc,0.29444,
Baseline,31500,mmstar_average,0.37192898887525,
Baseline,31500,ocrbench_ocrbench_accuracy,0.61,
Baseline,31500,textvqa_val_exact_match,0.63644,0.006473052244580776
Baseline,32700,ai2d_exact_match,0.49870466321243523,0.00899912391990207
Baseline,32700,average,0.5546837276191249,
Baseline,32700,average_rank,1.5,
Baseline,32700,chartqa_relaxed_overall,0.68,0.009331389496316869
Baseline,32700,docvqa_val_anls,0.7278962076951819,0.005686137433507678
Baseline,32700,infovqa_val_anls,0.3359004823603636,0.007743137801806592
Baseline,32700,mme_total_score,1329.2223889555821,
Baseline,32700,mmstar_average,0.3761847400658931,
Baseline,32700,ocrbench_ocrbench_accuracy,0.626,
Baseline,32700,textvqa_val_exact_match,0.6381000000000001,0.006469625121275727
Baseline,33900,ai2d_exact_match,0.5019430051813472,0.00899908617055323
Baseline,33900,average,0.5185104134885045,
Baseline,33900,average_rank,1.5555555555555556,
Baseline,33900,chartqa_relaxed_overall,0.6784,0.009343676884347384
Baseline,33900,docvqa_val_anls,0.7328401883203162,0.005641229328683336
Baseline,33900,infovqa_val_anls,0.33727943427582574,0.0077500601420040695
Baseline,33900,mme_total_score,1330.3196278511405,
Baseline,33900,mmmu_val_mmmu_acc,0.28,
Baseline,33900,mmstar_average,0.3640006801305467,
Baseline,33900,ocrbench_ocrbench_accuracy,0.617,
Baseline,33900,textvqa_val_exact_match,0.63662,0.006467562214018388
Baseline,35100,ai2d_exact_match,0.5029145077720207,0.008999001233939133
Baseline,35100,average,0.5522905800868071,
Baseline,35100,average_rank,1.625,
Baseline,35100,chartqa_relaxed_overall,0.68,0.009331389496316869
Baseline,35100,docvqa_val_anls,0.7269648828481717,0.005683622810231662
Baseline,35100,infovqa_val_anls,0.33846207838337145,0.00774681529996113
Baseline,35100,mme_total_score,1299.1129451780712,
Baseline,35100,mmstar_average,0.36183259160408615,
Baseline,35100,ocrbench_ocrbench_accuracy,0.616,
Baseline,35100,textvqa_val_exact_match,0.63986,0.0064564830453322595
Baseline,36300,ai2d_exact_match,0.501619170984456,0.008999106932714636
Baseline,36300,average,0.5203510175588769,
Baseline,36300,average_rank,1.4444444444444444,
Baseline,36300,chartqa_relaxed_overall,0.6808,0.009325198535746702
Baseline,36300,docvqa_val_anls,0.7270212281583848,0.0056833541878296414
Baseline,36300,infovqa_val_anls,0.3340392024865933,0.007611756166885497
Baseline,36300,mme_total_score,1280.1442577030812,
Baseline,36300,mmmu_val_mmmu_acc,0.30111,
Baseline,36300,mmstar_average,0.36247853884158143,
Baseline,36300,ocrbench_ocrbench_accuracy,0.615,
Baseline,36300,textvqa_val_exact_match,0.64074,0.0064493076522863105
Baseline,37500,ai2d_exact_match,0.5074481865284974,0.008998155599035891
Baseline,37500,average,0.5599086924183005,
Baseline,37500,average_rank,1.25,
Baseline,37500,chartqa_relaxed_overall,0.69,0.009251715392027472
Baseline,37500,docvqa_val_anls,0.7338638293909314,0.005628628195159443
Baseline,37500,infovqa_val_anls,0.35075945776545553,0.007880392253956911
Baseline,37500,mme_total_score,1308.0833333333333,
Baseline,37500,mmstar_average,0.37624937324321944,
Baseline,37500,ocrbench_ocrbench_accuracy,0.622,
Baseline,37500,textvqa_val_exact_match,0.63904,0.006478670412520058
Baseline,38700,ai2d_exact_match,0.5,0.008999154119267315
Baseline,38700,average,0.5225140432328732,
Baseline,38700,average_rank,1.5555555555555556,
Baseline,38700,chartqa_relaxed_overall,0.6832,0.009306435832216308
Baseline,38700,docvqa_val_anls,0.73088808708227,0.00563114482117092
Baseline,38700,infovqa_val_anls,0.3478216232204623,0.00789714223139076
Baseline,38700,mme_total_score,1277.5526210484195,
Baseline,38700,mmmu_val_mmmu_acc,0.28667,
Baseline,38700,mmstar_average,0.3681926355602532,
Baseline,38700,ocrbench_ocrbench_accuracy,0.624,
Baseline,38700,textvqa_val_exact_match,0.6393399999999999,0.00647079957419683
Baseline,39900,ai2d_exact_match,0.5058290155440415,0.008998542562369288
Baseline,39900,average,0.5567573845010034,
Baseline,39900,average_rank,1.375,
Baseline,39900,chartqa_relaxed_overall,0.6788,0.00934061683451043
Baseline,39900,docvqa_val_anls,0.7307115103048833,0.005666517404544185
Baseline,39900,infovqa_val_anls,0.3519024541637205,0.007911172051974351
Baseline,39900,mme_total_score,1294.3033213285314,
Baseline,39900,mmstar_average,0.36969871149437833,
Baseline,39900,ocrbench_ocrbench_accuracy,0.619,
Baseline,39900,textvqa_val_exact_match,0.6413599999999999,0.006448549204074314
Internal Deduplication,300,ai2d_exact_match,0.2503238341968912,0.007796858242572104
Internal Deduplication,300,average,0.19412722789194248,
Internal Deduplication,300,average_rank,1.5555555555555556,
Internal Deduplication,300,chartqa_relaxed_overall,0.1412,0.0069659481604092775
Internal Deduplication,300,docvqa_val_anls,0.15637861297756628,0.004267695603476823
Internal Deduplication,300,infovqa_val_anls,0.1042887841127396,0.005046536381262501
Internal Deduplication,300,mme_total_score,598.6149459783913,
Internal Deduplication,300,mmmu_val_mmmu_acc,0.26556,
Internal Deduplication,300,mmstar_average,0.2694265918483427,
Internal Deduplication,300,ocrbench_ocrbench_accuracy,0.167,
Internal Deduplication,300,textvqa_val_exact_match,0.19884000000000002,0.005492264002465154
Internal Deduplication,1500,ai2d_exact_match,0.27299222797927464,0.008018190192865413
Internal Deduplication,1500,average,0.31955460499150806,
Internal Deduplication,1500,average_rank,1.7777777777777777,
Internal Deduplication,1500,chartqa_relaxed_overall,0.3708,0.00966231277258432
Internal Deduplication,1500,docvqa_val_anls,0.42768709568231533,0.006154040400291129
Internal Deduplication,1500,infovqa_val_anls,0.2099303690224102,0.00676857279363082
Internal Deduplication,1500,mme_total_score,992.9132653061225,
Internal Deduplication,1500,mmmu_val_mmmu_acc,0.26889,
Internal Deduplication,1500,mmstar_average,0.21057714724806412,
Internal Deduplication,1500,ocrbench_ocrbench_accuracy,0.404,
Internal Deduplication,1500,textvqa_val_exact_match,0.39155999999999996,0.006665511164780805
Internal Deduplication,2700,ai2d_exact_match,0.295660621761658,0.008213332656949247
Internal Deduplication,2700,average,0.36762151428382045,
Internal Deduplication,2700,average_rank,1.5555555555555556,
Internal Deduplication,2700,chartqa_relaxed_overall,0.4752,0.009989689762981844
Internal Deduplication,2700,docvqa_val_anls,0.5094800317043119,0.006254649346492251
Internal Deduplication,2700,infovqa_val_anls,0.20719401979989327,0.006520807933324386
Internal Deduplication,2700,mme_total_score,1071.3925570228091,
Internal Deduplication,2700,mmmu_val_mmmu_acc,0.27,
Internal Deduplication,2700,mmstar_average,0.2397774410047003,
Internal Deduplication,2700,ocrbench_ocrbench_accuracy,0.494,
Internal Deduplication,2700,textvqa_val_exact_match,0.44965999999999995,0.006770608917152268
Internal Deduplication,3900,ai2d_exact_match,0.35751295336787564,0.008626006165018857
Internal Deduplication,3900,average,0.40092708598125315,
Internal Deduplication,3900,average_rank,1.5555555555555556,
Internal Deduplication,3900,chartqa_relaxed_overall,0.5108,0.009999667061284322
Internal Deduplication,3900,docvqa_val_anls,0.5404721998847206,0.0062378368939630035
Internal Deduplication,3900,infovqa_val_anls,0.22349780573998537,0.006643570027298634
Internal Deduplication,3900,mme_total_score,1134.516706682673,
Internal Deduplication,3900,mmmu_val_mmmu_acc,0.29111,
Internal Deduplication,3900,mmstar_average,0.27976372885744333,
Internal Deduplication,3900,ocrbench_ocrbench_accuracy,0.51,
Internal Deduplication,3900,textvqa_val_exact_match,0.49426000000000003,0.006797576913163843
Internal Deduplication,5100,ai2d_exact_match,0.38827720207253885,0.008771623130477878
Internal Deduplication,5100,average,0.4219485735226934,
Internal Deduplication,5100,average_rank,1.7777777777777777,
Internal Deduplication,5100,chartqa_relaxed_overall,0.5236,0.009990852959439592
Internal Deduplication,5100,docvqa_val_anls,0.5747949496010799,0.006245322873999332
Internal Deduplication,5100,infovqa_val_anls,0.2283558074433608,0.006643505571541433
Internal Deduplication,5100,mme_total_score,1120.3775510204082,
Internal Deduplication,5100,mmmu_val_mmmu_acc,0.27444,
Internal Deduplication,5100,mmstar_average,0.32262062906456745,
Internal Deduplication,5100,ocrbench_ocrbench_accuracy,0.546,
Internal Deduplication,5100,textvqa_val_exact_match,0.5175,0.006791610648074506
Internal Deduplication,6300,ai2d_exact_match,0.3947538860103627,0.008797532848529212
Internal Deduplication,6300,average,0.4392913905300591,
Internal Deduplication,6300,average_rank,1.5555555555555556,
Internal Deduplication,6300,chartqa_relaxed_overall,0.554,0.009943497838271193
Internal Deduplication,6300,docvqa_val_anls,0.6054354573141266,0.006148692369883667
Internal Deduplication,6300,infovqa_val_anls,0.2479668172159887,0.006849066135124891
Internal Deduplication,6300,mme_total_score,1120.747699079632,
Internal Deduplication,6300,mmmu_val_mmmu_acc,0.28222,
Internal Deduplication,6300,mmstar_average,0.33081496369999497,
Internal Deduplication,6300,ocrbench_ocrbench_accuracy,0.562,
Internal Deduplication,6300,textvqa_val_exact_match,0.53714,0.00675218797787041
Internal Deduplication,7500,ai2d_exact_match,0.4368523316062176,0.008927095061184939
Internal Deduplication,7500,average,0.4484625925841701,
Internal Deduplication,7500,average_rank,1.6666666666666667,
Internal Deduplication,7500,chartqa_relaxed_overall,0.5716,0.009898917689756362
Internal Deduplication,7500,docvqa_val_anls,0.6158904129878224,0.006156668221029065
Internal Deduplication,7500,infovqa_val_anls,0.2491041330885082,0.006950914810318631
Internal Deduplication,7500,mme_total_score,1182.0997398959585,
Internal Deduplication,7500,mmmu_val_mmmu_acc,0.30222,
Internal Deduplication,7500,mmstar_average,0.3126938629908125,
Internal Deduplication,7500,ocrbench_ocrbench_accuracy,0.554,
Internal Deduplication,7500,textvqa_val_exact_match,0.5453399999999999,0.006743052026354684
Internal Deduplication,8700,ai2d_exact_match,0.43555699481865284,0.008924095913829722
Internal Deduplication,8700,average,0.4610890710492869,
Internal Deduplication,8700,average_rank,1.4444444444444444,
Internal Deduplication,8700,chartqa_relaxed_overall,0.5856,0.009854334029231191
Internal Deduplication,8700,docvqa_val_anls,0.6337792662388687,0.006121292484093459
Internal Deduplication,8700,infovqa_val_anls,0.3014589775424448,0.007723778532370607
Internal Deduplication,8700,mme_total_score,1146.702080832333,
Internal Deduplication,8700,mmmu_val_mmmu_acc,0.28111,
Internal Deduplication,8700,mmstar_average,0.34138732979432873,
Internal Deduplication,8700,ocrbench_ocrbench_accuracy,0.554,
Internal Deduplication,8700,textvqa_val_exact_match,0.5558200000000001,0.006722310868494742
Internal Deduplication,9900,ai2d_exact_match,0.4530440414507772,0.008959382447335284
Internal Deduplication,9900,average,0.4640919637505932,
Internal Deduplication,9900,average_rank,1.4444444444444444,
Internal Deduplication,9900,chartqa_relaxed_overall,0.596,0.009815912634917984
Internal Deduplication,9900,docvqa_val_anls,0.6449581300442709,0.006031449307242489
Internal Deduplication,9900,infovqa_val_anls,0.2651241729320676,0.007027677036596941
Internal Deduplication,9900,mme_total_score,1198.2277911164465,
Internal Deduplication,9900,mmmu_val_mmmu_acc,0.28,
Internal Deduplication,9900,mmstar_average,0.33564936557763,
Internal Deduplication,9900,ocrbench_ocrbench_accuracy,0.571,
Internal Deduplication,9900,textvqa_val_exact_match,0.5669599999999999,0.0067004067615447065
Internal Deduplication,11100,ai2d_exact_match,0.4566062176165803,0.008965198879336196
Internal Deduplication,11100,average,0.4745786301209996,
Internal Deduplication,11100,average_rank,1.3333333333333333,
Internal Deduplication,11100,chartqa_relaxed_overall,0.608,0.00976588700628918
Internal Deduplication,11100,docvqa_val_anls,0.6596743239996393,0.005996833864420919
Internal Deduplication,11100,infovqa_val_anls,0.30142039609988674,0.0075421730872732295
Internal Deduplication,11100,mme_total_score,1136.5589235694279,
Internal Deduplication,11100,mmmu_val_mmmu_acc,0.29,
Internal Deduplication,11100,mmstar_average,0.32532810325189065,
Internal Deduplication,11100,ocrbench_ocrbench_accuracy,0.586,
Internal Deduplication,11100,textvqa_val_exact_match,0.5696,0.00669753233570974
Internal Deduplication,12300,ai2d_exact_match,0.47085492227979275,0.0089838527076916
Internal Deduplication,12300,average,0.47675266119609205,
Internal Deduplication,12300,average_rank,1.4444444444444444,
Internal Deduplication,12300,chartqa_relaxed_overall,0.6024,0.009789996609470577
Internal Deduplication,12300,docvqa_val_anls,0.6541921314490913,0.0059901948837693935
Internal Deduplication,12300,infovqa_val_anls,0.26890492643687214,0.0068929334847927185
Internal Deduplication,12300,mme_total_score,1180.1697679071628,
Internal Deduplication,12300,mmmu_val_mmmu_acc,0.30111,
Internal Deduplication,12300,mmstar_average,0.3420593094029801,
Internal Deduplication,12300,ocrbench_ocrbench_accuracy,0.588,
Internal Deduplication,12300,textvqa_val_exact_match,0.5865000000000001,0.006650353031162167
Internal Deduplication,13500,ai2d_exact_match,0.4689119170984456,0.008981742470016596
Internal Deduplication,13500,average,0.477194042186954,
Internal Deduplication,13500,average_rank,1.4444444444444444,
Internal Deduplication,13500,chartqa_relaxed_overall,0.6076,0.009767653701044555
Internal Deduplication,13500,docvqa_val_anls,0.6669529256090054,0.005964340335624923
Internal Deduplication,13500,infovqa_val_anls,0.28048200541677026,0.00715533754622952
Internal Deduplication,13500,mme_total_score,1205.548119247699,
Internal Deduplication,13500,mmmu_val_mmmu_acc,0.28556,
Internal Deduplication,13500,mmstar_average,0.3358454893714108,
Internal Deduplication,13500,ocrbench_ocrbench_accuracy,0.589,
Internal Deduplication,13500,textvqa_val_exact_match,0.5832,0.006654352566675162
Internal Deduplication,14700,ai2d_exact_match,0.47733160621761656,0.008989900821900263
Internal Deduplication,14700,average,0.4884023663438535,
Internal Deduplication,14700,average_rank,1.4444444444444444,
Internal Deduplication,14700,chartqa_relaxed_overall,0.6304,0.009655859891905061
Internal Deduplication,14700,docvqa_val_anls,0.6801802838124448,0.005922660123416213
Internal Deduplication,14700,infovqa_val_anls,0.306442807638199,0.007585813874676366
Internal Deduplication,14700,mme_total_score,1141.5065026010404,
Internal Deduplication,14700,mmmu_val_mmmu_acc,0.28556,
Internal Deduplication,14700,mmstar_average,0.3313042330825678,
Internal Deduplication,14700,ocrbench_ocrbench_accuracy,0.601,
Internal Deduplication,14700,textvqa_val_exact_match,0.595,0.006618682753560443
Internal Deduplication,15900,ai2d_exact_match,0.48737046632124353,0.0089962828388782
Internal Deduplication,15900,average,0.5203517701538484,
Internal Deduplication,15900,average_rank,1.5,
Internal Deduplication,15900,chartqa_relaxed_overall,0.6268,0.009675026948726469
Internal Deduplication,15900,docvqa_val_anls,0.6832159326200654,0.005900840845629961
Internal Deduplication,15900,infovqa_val_anls,0.3152545751330662,0.007651477632904633
Internal Deduplication,15900,mme_total_score,1225.4948979591836,
Internal Deduplication,15900,mmstar_average,0.32764141700256333,
Internal Deduplication,15900,ocrbench_ocrbench_accuracy,0.603,
Internal Deduplication,15900,textvqa_val_exact_match,0.5991799999999999,0.006605224547149299
Internal Deduplication,17100,ai2d_exact_match,0.47636010362694303,0.008989090232793597
Internal Deduplication,17100,average,0.4961663419392575,
Internal Deduplication,17100,average_rank,1.2222222222222223,
Internal Deduplication,17100,chartqa_relaxed_overall,0.6464,0.009563650001989001
Internal Deduplication,17100,docvqa_val_anls,0.6927261914773173,0.005861047908265113
Internal Deduplication,17100,infovqa_val_anls,0.3154358494585615,0.00763456160506387
Internal Deduplication,17100,mme_total_score,1286.2750100040016,
Internal Deduplication,17100,mmmu_val_mmmu_acc,0.29889,
Internal Deduplication,17100,mmstar_average,0.34921859095123836,
Internal Deduplication,17100,ocrbench_ocrbench_accuracy,0.587,
Internal Deduplication,17100,textvqa_val_exact_match,0.6033,0.006602767700613255
Internal Deduplication,18300,ai2d_exact_match,0.4786269430051813,0.008990928596702264
Internal Deduplication,18300,average,0.5266473503807093,
Internal Deduplication,18300,average_rank,1.5,
Internal Deduplication,18300,chartqa_relaxed_overall,0.6552,0.009507962165354631
Internal Deduplication,18300,docvqa_val_anls,0.6989798369115747,0.00583327960847754
Internal Deduplication,18300,infovqa_val_anls,0.31662733272229215,0.00758318378302427
Internal Deduplication,18300,mme_total_score,1217.9891956782712,
Internal Deduplication,18300,mmstar_average,0.3360973400259174,
Internal Deduplication,18300,ocrbench_ocrbench_accuracy,0.595,
Internal Deduplication,18300,textvqa_val_exact_match,0.6060000000000001,0.006592108249887561
Internal Deduplication,19500,ai2d_exact_match,0.4896373056994819,0.008997221155546277
Internal Deduplication,19500,average,0.5003413312777834,
Internal Deduplication,19500,average_rank,1.5555555555555556,
Internal Deduplication,19500,chartqa_relaxed_overall,0.6508,0.009536252935404934
Internal Deduplication,19500,docvqa_val_anls,0.7013552478733074,0.005824977752328648
Internal Deduplication,19500,infovqa_val_anls,0.32620790060169225,0.007764453086996403
Internal Deduplication,19500,mme_total_score,1299.4400760304122,
Internal Deduplication,19500,mmmu_val_mmmu_acc,0.29556,
Internal Deduplication,19500,mmstar_average,0.3368301960477849,
Internal Deduplication,19500,ocrbench_ocrbench_accuracy,0.593,
Internal Deduplication,19500,textvqa_val_exact_match,0.60934,0.006559905437723197
Internal Deduplication,20700,ai2d_exact_match,0.4889896373056995,0.008996971954224612
Internal Deduplication,20700,average,0.5296276786578733,
Internal Deduplication,20700,average_rank,1.75,
Internal Deduplication,20700,chartqa_relaxed_overall,0.6444,0.009575809858898698
Internal Deduplication,20700,docvqa_val_anls,0.6989112987356239,0.00585808944665685
Internal Deduplication,20700,infovqa_val_anls,0.3158264619814475,0.007568423570507376
Internal Deduplication,20700,mme_total_score,1174.7768107242898,
Internal Deduplication,20700,mmstar_average,0.33400635258234235,
Internal Deduplication,20700,ocrbench_ocrbench_accuracy,0.614,
Internal Deduplication,20700,textvqa_val_exact_match,0.6112599999999999,0.0065589363778955695
Internal Deduplication,21900,ai2d_exact_match,0.4957901554404145,0.008998835133354702
Internal Deduplication,21900,average,0.5035083877228906,
Internal Deduplication,21900,average_rank,1.5555555555555556,
Internal Deduplication,21900,chartqa_relaxed_overall,0.64,0.009601920576192066
Internal Deduplication,21900,docvqa_val_anls,0.7037412472922321,0.005813532329025727
Internal Deduplication,21900,infovqa_val_anls,0.3194560697014221,0.007649647661031666
Internal Deduplication,21900,mme_total_score,1199.6734693877552,
Internal Deduplication,21900,mmmu_val_mmmu_acc,0.30889,
Internal Deduplication,21900,mmstar_average,0.33692962934905674,
Internal Deduplication,21900,ocrbench_ocrbench_accuracy,0.603,
Internal Deduplication,21900,textvqa_val_exact_match,0.6202599999999999,0.006539392877923941
Internal Deduplication,23100,ai2d_exact_match,0.4944948186528497,0.008998608627616672
Internal Deduplication,23100,average,0.5413853458503779,
Internal Deduplication,23100,average_rank,1.5,
Internal Deduplication,23100,chartqa_relaxed_overall,0.646,0.009566096595876119
Internal Deduplication,23100,docvqa_val_anls,0.7101587999220607,0.005806193919644477
Internal Deduplication,23100,infovqa_val_anls,0.336754873549068,0.007886540099947482
Internal Deduplication,23100,mme_total_score,1316.6187474989997,
Internal Deduplication,23100,mmstar_average,0.3476289288286667,
Internal Deduplication,23100,ocrbench_ocrbench_accuracy,0.627,
Internal Deduplication,23100,textvqa_val_exact_match,0.62766,0.006520482207447814
Internal Deduplication,24300,ai2d_exact_match,0.4899611398963731,0.008997340090107673
Internal Deduplication,24300,average,0.5100750686661266,
Internal Deduplication,24300,average_rank,1.4444444444444444,
Internal Deduplication,24300,chartqa_relaxed_overall,0.6516,0.009531175862679805
Internal Deduplication,24300,docvqa_val_anls,0.7179021844889384,0.005742973360829408
Internal Deduplication,24300,infovqa_val_anls,0.3358758923979091,0.007878017215252312
Internal Deduplication,24300,mme_total_score,1409.844237695078,
Internal Deduplication,24300,mmmu_val_mmmu_acc,0.28556,
Internal Deduplication,24300,mmstar_average,0.3347613325457924,
Internal Deduplication,24300,ocrbench_ocrbench_accuracy,0.634,
Internal Deduplication,24300,textvqa_val_exact_match,0.63094,0.006498229657201687
Internal Deduplication,25500,ai2d_exact_match,0.48607512953367876,0.008995663534025174
Internal Deduplication,25500,average,0.5472398215745332,
Internal Deduplication,25500,average_rank,1.375,
Internal Deduplication,25500,chartqa_relaxed_overall,0.6536,0.0095183536193109
Internal Deduplication,25500,docvqa_val_anls,0.7180940785000507,0.005735169057784404
Internal Deduplication,25500,infovqa_val_anls,0.35632636677863483,0.008180298439903802
Internal Deduplication,25500,mme_total_score,1376.716986794718,
Internal Deduplication,25500,mmstar_average,0.3529231762093682,
Internal Deduplication,25500,ocrbench_ocrbench_accuracy,0.633,
Internal Deduplication,25500,textvqa_val_exact_match,0.63066,0.006504156647155582
Internal Deduplication,26700,ai2d_exact_match,0.49255181347150256,0.008998155599035912
Internal Deduplication,26700,average,0.516487110189266,
Internal Deduplication,26700,average_rank,1.5555555555555556,
Internal Deduplication,26700,chartqa_relaxed_overall,0.6644,0.009445885130487209
Internal Deduplication,26700,docvqa_val_anls,0.7168133343849862,0.005756579734549226
Internal Deduplication,26700,infovqa_val_anls,0.34371436472133005,0.008017561696940439
Internal Deduplication,26700,mme_total_score,1409.4487795118048,
Internal Deduplication,26700,mmmu_val_mmmu_acc,0.30222,
Internal Deduplication,26700,mmstar_average,0.35023736893630925,
Internal Deduplication,26700,ocrbench_ocrbench_accuracy,0.63,
Internal Deduplication,26700,textvqa_val_exact_match,0.6319600000000001,0.006495302107669356
Internal Deduplication,27900,ai2d_exact_match,0.4954663212435233,0.008998784170060767
Internal Deduplication,27900,average,0.5488694312151498,
Internal Deduplication,27900,average_rank,1.375,
Internal Deduplication,27900,chartqa_relaxed_overall,0.6736,0.009379787213112317
Internal Deduplication,27900,docvqa_val_anls,0.7224633461958828,0.005716176978314635
Internal Deduplication,27900,infovqa_val_anls,0.35413809221269893,0.00811649922857756
Internal Deduplication,27900,mme_total_score,1365.8970588235293,
Internal Deduplication,27900,mmstar_average,0.33847825885394267,
Internal Deduplication,27900,ocrbench_ocrbench_accuracy,0.623,
Internal Deduplication,27900,textvqa_val_exact_match,0.6349400000000001,0.006474057612069333
Internal Deduplication,29100,ai2d_exact_match,0.4957901554404145,0.008998835133354704
Internal Deduplication,29100,average,0.5113797484193323,
Internal Deduplication,29100,average_rank,2.0,
Internal Deduplication,29100,chartqa_relaxed_overall,0.6604,0.009473364442136777
Internal Deduplication,29100,docvqa_val_anls,0.716657704725735,0.005756925555640175
Internal Deduplication,29100,infovqa_val_anls,0.3372271343716428,0.007828634509891694
Internal Deduplication,29100,mme_total_score,1300.1049419767908,
Internal Deduplication,29100,mmmu_val_mmmu_acc,0.29556,
Internal Deduplication,29100,mmstar_average,0.33882299281686595,
Internal Deduplication,29100,ocrbench_ocrbench_accuracy,0.613,
Internal Deduplication,29100,textvqa_val_exact_match,0.6335799999999999,0.006486361946288509
Internal Deduplication,30300,ai2d_exact_match,0.49676165803108807,0.008998965371572352
Internal Deduplication,30300,average,0.5468368131516261,
Internal Deduplication,30300,average_rank,1.625,
Internal Deduplication,30300,chartqa_relaxed_overall,0.6608,0.009470650520873179
Internal Deduplication,30300,docvqa_val_anls,0.7208981382284003,0.005745692168242118
Internal Deduplication,30300,infovqa_val_anls,0.33146012551516996,0.007795838114372819
Internal Deduplication,30300,mme_total_score,1330.1678671468587,
Internal Deduplication,30300,mmstar_average,0.35709777028672485,
Internal Deduplication,30300,ocrbench_ocrbench_accuracy,0.622,
Internal Deduplication,30300,textvqa_val_exact_match,0.6388400000000001,0.006462092742178937
Internal Deduplication,31500,ai2d_exact_match,0.4996761658031088,0.008999152231809677
Internal Deduplication,31500,average,0.5161255997108974,
Internal Deduplication,31500,average_rank,1.4444444444444444,
Internal Deduplication,31500,chartqa_relaxed_overall,0.6624,0.009459719367730022
Internal Deduplication,31500,docvqa_val_anls,0.7248827916963386,0.005715267948257416
Internal Deduplication,31500,infovqa_val_anls,0.3462785194206036,0.007940616340604684
Internal Deduplication,31500,mme_total_score,1388.7246898759504,
Internal Deduplication,31500,mmmu_val_mmmu_acc,0.28556,
Internal Deduplication,31500,mmstar_average,0.34634732076712815,
Internal Deduplication,31500,ocrbench_ocrbench_accuracy,0.622,
Internal Deduplication,31500,textvqa_val_exact_match,0.64186,0.006449237676913657
Internal Deduplication,32700,ai2d_exact_match,0.4957901554404145,0.008998835133354704
Internal Deduplication,32700,average,0.5500475012134611,
Internal Deduplication,32700,average_rank,1.5,
Internal Deduplication,32700,chartqa_relaxed_overall,0.6688,0.009414779829167153
Internal Deduplication,32700,docvqa_val_anls,0.7263156273407247,0.00570514646941267
Internal Deduplication,32700,infovqa_val_anls,0.3489756877198793,0.00798640336179305
Internal Deduplication,32700,mme_total_score,1362.764905962385,
Internal Deduplication,32700,mmstar_average,0.3385910379932094,
Internal Deduplication,32700,ocrbench_ocrbench_accuracy,0.63,
Internal Deduplication,32700,textvqa_val_exact_match,0.64186,0.006452586710386076
Internal Deduplication,33900,ai2d_exact_match,0.4957901554404145,0.008998835133354704
Internal Deduplication,33900,average,0.5160312203077811,
Internal Deduplication,33900,average_rank,1.4444444444444444,
Internal Deduplication,33900,chartqa_relaxed_overall,0.674,0.009376820884924869
Internal Deduplication,33900,docvqa_val_anls,0.7257174511919398,0.005702388110070895
Internal Deduplication,33900,infovqa_val_anls,0.3422539948680319,0.007936425119162906
Internal Deduplication,33900,mme_total_score,1389.4628851540615,
Internal Deduplication,33900,mmmu_val_mmmu_acc,0.28444,
Internal Deduplication,33900,mmstar_average,0.34272816096186326,
Internal Deduplication,33900,ocrbench_ocrbench_accuracy,0.619,
Internal Deduplication,33900,textvqa_val_exact_match,0.64432,0.0064359794815068575
Internal Deduplication,35100,ai2d_exact_match,0.49838082901554404,0.008999106932714645
Internal Deduplication,35100,average,0.5533101842015907,
Internal Deduplication,35100,average_rank,1.375,
Internal Deduplication,35100,chartqa_relaxed_overall,0.6736,0.009379787213112317
Internal Deduplication,35100,docvqa_val_anls,0.7278181728761878,0.005688301164010059
Internal Deduplication,35100,infovqa_val_anls,0.351201318391893,0.008119188634171728
Internal Deduplication,35100,mme_total_score,1411.3839535814327,
Internal Deduplication,35100,mmstar_average,0.34205096912751043,
Internal Deduplication,35100,ocrbench_ocrbench_accuracy,0.634,
Internal Deduplication,35100,textvqa_val_exact_match,0.64612,0.006431209933771596
Internal Deduplication,36300,ai2d_exact_match,0.49805699481865284,0.00899908617055324
Internal Deduplication,36300,average,0.5195231205481649,
Internal Deduplication,36300,average_rank,1.5555555555555556,
Internal Deduplication,36300,chartqa_relaxed_overall,0.672,0.009391574983583366
Internal Deduplication,36300,docvqa_val_anls,0.730916270863908,0.005660120362847363
Internal Deduplication,36300,infovqa_val_anls,0.3412406587672079,0.007911958522422949
Internal Deduplication,36300,mme_total_score,1367.637254901961,
Internal Deduplication,36300,mmmu_val_mmmu_acc,0.29444,
Internal Deduplication,36300,mmstar_average,0.34529103993555027,
Internal Deduplication,36300,ocrbench_ocrbench_accuracy,0.634,
Internal Deduplication,36300,textvqa_val_exact_match,0.6402399999999999,0.006461617365628822
Internal Deduplication,37500,ai2d_exact_match,0.5019430051813472,0.008999086170553233
Internal Deduplication,37500,average,0.5495836143474903,
Internal Deduplication,37500,average_rank,1.75,
Internal Deduplication,37500,chartqa_relaxed_overall,0.6756,0.009364877808842454
Internal Deduplication,37500,docvqa_val_anls,0.7255309514873474,0.005687086085909167
Internal Deduplication,37500,infovqa_val_anls,0.3366534174444908,0.007850461211973954
Internal Deduplication,37500,mme_total_score,1364.8713485394157,
Internal Deduplication,37500,mmstar_average,0.3467179263192468,
Internal Deduplication,37500,ocrbench_ocrbench_accuracy,0.618,
Internal Deduplication,37500,textvqa_val_exact_match,0.64264,0.0064540760066348676
Internal Deduplication,38700,ai2d_exact_match,0.49708549222797926,0.008999001233939138
Internal Deduplication,38700,average,0.5196671356527304,
Internal Deduplication,38700,average_rank,1.4444444444444444,
Internal Deduplication,38700,chartqa_relaxed_overall,0.6744,0.009373846787815587
Internal Deduplication,38700,docvqa_val_anls,0.732080533728902,0.0056514543481841085
Internal Deduplication,38700,infovqa_val_anls,0.34326469229313616,0.0079487702679686
Internal Deduplication,38700,mme_total_score,1366.760604241697,
Internal Deduplication,38700,mmmu_val_mmmu_acc,0.28778,
Internal Deduplication,38700,mmstar_average,0.34458636697182526,
Internal Deduplication,38700,ocrbench_ocrbench_accuracy,0.632,
Internal Deduplication,38700,textvqa_val_exact_match,0.6461399999999999,0.00642093963319658
Internal Deduplication,39900,ai2d_exact_match,0.4957901554404145,0.008998835133354702
Internal Deduplication,39900,average,0.5516529838475074,
Internal Deduplication,39900,average_rank,1.625,
Internal Deduplication,39900,chartqa_relaxed_overall,0.6696,0.009409024811273465
Internal Deduplication,39900,docvqa_val_anls,0.723701988394961,0.005721818793341698
Internal Deduplication,39900,infovqa_val_anls,0.3483904533235705,0.007951328084102772
Internal Deduplication,39900,mme_total_score,1403.717386954782,
Internal Deduplication,39900,mmstar_average,0.34950828977360593,
Internal Deduplication,39900,ocrbench_ocrbench_accuracy,0.629,
Internal Deduplication,39900,textvqa_val_exact_match,0.64558,0.006428340177019748