nicholasKluge commited on
Commit
488b787
·
verified ·
1 Parent(s): afa39cc

Upload evals_for_comparison.csv with huggingface_hub

Browse files
Files changed (1) hide show
  1. evals_for_comparison.csv +22 -0
evals_for_comparison.csv ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model,arc_challenge_poly_pt_acc,arc_challenge_poly_pt_acc_norm,arc_challenge_poly_pt_acc_norm_stderr,arc_challenge_poly_pt_acc_stderr,arc_challenge_poly_pt_alias,assin2_rte_acc,assin2_rte_acc_stderr,assin2_rte_alias,assin2_rte_f1_macro,assin2_rte_f1_macro_stderr,assin2_sts_alias,assin2_sts_mse,assin2_sts_mse_stderr,assin2_sts_pearson,assin2_sts_pearson_stderr,assin_entailment_acc,assin_entailment_acc_stderr,assin_entailment_alias,assin_paraphrase_acc,assin_paraphrase_acc_stderr,assin_paraphrase_alias,belebele_por_Latn_acc,belebele_por_Latn_acc_norm,belebele_por_Latn_acc_norm_stderr,belebele_por_Latn_acc_stderr,belebele_por_Latn_alias,bluex_acc,"bluex_acc,exam_id__UNICAMP_2018","bluex_acc,exam_id__UNICAMP_2019","bluex_acc,exam_id__UNICAMP_2020","bluex_acc,exam_id__UNICAMP_2021_1","bluex_acc,exam_id__UNICAMP_2021_2","bluex_acc,exam_id__UNICAMP_2022","bluex_acc,exam_id__UNICAMP_2023","bluex_acc,exam_id__UNICAMP_2024","bluex_acc,exam_id__USP_2018","bluex_acc,exam_id__USP_2019","bluex_acc,exam_id__USP_2020","bluex_acc,exam_id__USP_2021","bluex_acc,exam_id__USP_2022","bluex_acc,exam_id__USP_2023","bluex_acc,exam_id__USP_2024",bluex_acc_stderr,"bluex_acc_stderr,exam_id__UNICAMP_2018","bluex_acc_stderr,exam_id__UNICAMP_2019","bluex_acc_stderr,exam_id__UNICAMP_2020","bluex_acc_stderr,exam_id__UNICAMP_2021_1","bluex_acc_stderr,exam_id__UNICAMP_2021_2","bluex_acc_stderr,exam_id__UNICAMP_2022","bluex_acc_stderr,exam_id__UNICAMP_2023","bluex_acc_stderr,exam_id__UNICAMP_2024","bluex_acc_stderr,exam_id__USP_2018","bluex_acc_stderr,exam_id__USP_2019","bluex_acc_stderr,exam_id__USP_2020","bluex_acc_stderr,exam_id__USP_2021","bluex_acc_stderr,exam_id__USP_2022","bluex_acc_stderr,exam_id__USP_2023","bluex_acc_stderr,exam_id__USP_2024",bluex_alias,calame_pt_acc,calame_pt_acc_stderr,calame_pt_alias,calame_pt_perplexity,calame_pt_perplexity_stderr,enem_challenge_acc,"enem_challenge_acc,exam_id__2009","enem_challenge_acc,exam_id__2010","enem_challenge_acc,exam_id__2011","enem_challenge_acc,exam_id__2012","enem_challenge_acc,exam_id__2013","enem_challenge_acc,exam_id__2014","enem_challenge_acc,exam_id__2015","enem_challenge_acc,exam_id__2016","enem_challenge_acc,exam_id__2016_2","enem_challenge_acc,exam_id__2017","enem_challenge_acc,exam_id__2022","enem_challenge_acc,exam_id__2023",enem_challenge_acc_stderr,"enem_challenge_acc_stderr,exam_id__2009","enem_challenge_acc_stderr,exam_id__2010","enem_challenge_acc_stderr,exam_id__2011","enem_challenge_acc_stderr,exam_id__2012","enem_challenge_acc_stderr,exam_id__2013","enem_challenge_acc_stderr,exam_id__2014","enem_challenge_acc_stderr,exam_id__2015","enem_challenge_acc_stderr,exam_id__2016","enem_challenge_acc_stderr,exam_id__2016_2","enem_challenge_acc_stderr,exam_id__2017","enem_challenge_acc_stderr,exam_id__2022","enem_challenge_acc_stderr,exam_id__2023",enem_challenge_alias,faquad_nli_acc,faquad_nli_acc_stderr,faquad_nli_alias,faquad_nli_f1_macro,faquad_nli_f1_macro_stderr,global_piqa_completions_por_latn_braz_acc,global_piqa_completions_por_latn_braz_acc_bytes,global_piqa_completions_por_latn_braz_acc_bytes_stderr,global_piqa_completions_por_latn_braz_acc_norm,global_piqa_completions_por_latn_braz_acc_norm_stderr,global_piqa_completions_por_latn_braz_acc_stderr,global_piqa_completions_por_latn_braz_alias,hatebr_offensive_acc,hatebr_offensive_acc_stderr,hatebr_offensive_alias,hatebr_offensive_f1_macro,hatebr_offensive_f1_macro_stderr,hellaswag_poly_pt_acc,hellaswag_poly_pt_acc_norm,hellaswag_poly_pt_acc_norm_stderr,hellaswag_poly_pt_acc_stderr,hellaswag_poly_pt_alias,lambada_poly_pt_acc,lambada_poly_pt_acc_stderr,lambada_poly_pt_alias,lambada_poly_pt_perplexity,lambada_poly_pt_perplexity_stderr,mmlu_poly_pt_acc,mmlu_poly_pt_acc_stderr,mmlu_poly_pt_alias,oab_exams_acc,"oab_exams_acc,exam_id__2010-01","oab_exams_acc,exam_id__2010-02","oab_exams_acc,exam_id__2011-03","oab_exams_acc,exam_id__2011-04","oab_exams_acc,exam_id__2011-05","oab_exams_acc,exam_id__2012-06","oab_exams_acc,exam_id__2012-06a","oab_exams_acc,exam_id__2012-07","oab_exams_acc,exam_id__2012-08","oab_exams_acc,exam_id__2012-09","oab_exams_acc,exam_id__2013-10","oab_exams_acc,exam_id__2013-11","oab_exams_acc,exam_id__2013-12","oab_exams_acc,exam_id__2014-13","oab_exams_acc,exam_id__2014-14","oab_exams_acc,exam_id__2014-15","oab_exams_acc,exam_id__2015-16","oab_exams_acc,exam_id__2015-17","oab_exams_acc,exam_id__2015-18","oab_exams_acc,exam_id__2016-19","oab_exams_acc,exam_id__2016-20","oab_exams_acc,exam_id__2016-20a","oab_exams_acc,exam_id__2016-21","oab_exams_acc,exam_id__2017-22","oab_exams_acc,exam_id__2017-23","oab_exams_acc,exam_id__2017-24","oab_exams_acc,exam_id__2018-25",oab_exams_acc_stderr,"oab_exams_acc_stderr,exam_id__2010-01","oab_exams_acc_stderr,exam_id__2010-02","oab_exams_acc_stderr,exam_id__2011-03","oab_exams_acc_stderr,exam_id__2011-04","oab_exams_acc_stderr,exam_id__2011-05","oab_exams_acc_stderr,exam_id__2012-06","oab_exams_acc_stderr,exam_id__2012-06a","oab_exams_acc_stderr,exam_id__2012-07","oab_exams_acc_stderr,exam_id__2012-08","oab_exams_acc_stderr,exam_id__2012-09","oab_exams_acc_stderr,exam_id__2013-10","oab_exams_acc_stderr,exam_id__2013-11","oab_exams_acc_stderr,exam_id__2013-12","oab_exams_acc_stderr,exam_id__2014-13","oab_exams_acc_stderr,exam_id__2014-14","oab_exams_acc_stderr,exam_id__2014-15","oab_exams_acc_stderr,exam_id__2015-16","oab_exams_acc_stderr,exam_id__2015-17","oab_exams_acc_stderr,exam_id__2015-18","oab_exams_acc_stderr,exam_id__2016-19","oab_exams_acc_stderr,exam_id__2016-20","oab_exams_acc_stderr,exam_id__2016-20a","oab_exams_acc_stderr,exam_id__2016-21","oab_exams_acc_stderr,exam_id__2017-22","oab_exams_acc_stderr,exam_id__2017-23","oab_exams_acc_stderr,exam_id__2017-24","oab_exams_acc_stderr,exam_id__2018-25",oab_exams_alias,portuguese_hate_speech_acc,portuguese_hate_speech_acc_stderr,portuguese_hate_speech_alias,portuguese_hate_speech_f1_macro,portuguese_hate_speech_f1_macro_stderr,tweetsentbr_acc,tweetsentbr_acc_stderr,tweetsentbr_alias,tweetsentbr_f1_macro,tweetsentbr_f1_macro_stderr
2
+ Carvalho_pt-gl-1.3B,0.22393162393162394,0.27008547008547007,0.01298611843358399,0.0121927158461456,arc_challenge_poly_pt,0.5110294117647058,0.007121097076497118,assin2_rte,0.36722671927079786,0.0049309185159497285,assin2_sts,2.0948284313725494,N/A,0.15133974075980117,0.013120771773132062,0.5895,0.007778982301137492,assin_entailment,0.59825,0.007752532477516862,assin_paraphrase,0.2644444444444444,0.2644444444444444,0.014709405413413016,0.014709405413413016,belebele_por_Latn,0.1933240611961057,0.18518518518518517,0.16,0.16363636363636364,0.30434782608695654,0.13725490196078433,0.2564102564102564,0.3023255813953488,0.2,0.12962962962962962,0.275,0.17857142857142858,0.19230769230769232,0.20408163265306123,0.1590909090909091,0.0975609756097561,0.008497175275346908,0.030449766220614852,0.029957185086876628,0.028779062044264246,0.039212298659560045,0.02781885209546522,0.04034762088286155,0.04033157238478193,0.03431993090478306,0.026533203541840373,0.040824905007708356,0.029408950586947324,0.03154800187262653,0.03320244846261979,0.03164818002655287,0.0267943828097282,bluex,0.5342003853564548,0.010950718381924837,calame_pt,13.465732334461586,1.122807086392345,0.18124562631210636,0.19130434782608696,0.20512820512820512,0.18803418803418803,0.19827586206896552,0.17592592592592593,0.1926605504587156,0.14285714285714285,0.10743801652892562,0.21951219512195122,0.22413793103448276,0.12781954887218044,0.2074074074074074,0.005867066527399794,0.021201349082465924,0.021509781766930424,0.020831837923742578,0.02126628150486553,0.021142506356778397,0.021752934272906366,0.01854353109204073,0.0162821622451065,0.021525405859569618,0.022399242178130562,0.016728442068782046,0.020240788149841194,enem,0.7846153846153846,0.011396120309131366,faquad_nli,0.4396551724137931,0.00357969847290883,0.61,0.63,0.048523658709390974,0.63,0.048523658709390974,0.04902071300001973,global_piqa_completions_por_latn_braz,0.46285714285714286,0.009432572356706628,hatebr_offensive_binary,0.39197789682567186,0.008462645611112886,0.3201863690540687,0.38530718387690976,0.005066159153273153,0.004856714348809714,hellaswag_poly_pt,0.3359208228216573,0.006580220803755733,lambada_poly_pt,31.93028814045103,1.1999785179314317,0.24819873911738216,0.0037424007920321208,mmlu_poly_pt,0.22323462414578588,0.29411764705882354,0.25,0.18181818181818182,0.3,0.25,0.225,0.225,0.175,0.25,0.24675324675324675,0.2,0.2125,0.2875,0.1625,0.2375,0.15384615384615385,0.3,0.20512820512820512,0.1875,0.1794871794871795,0.1625,0.2125,0.2,0.2375,0.2125,0.225,0.25,0.005110953310735318,0.02852907313936527,0.02501712709833526,0.022309719185212712,0.02958653840535198,0.027943826041476338,0.026853093879096398,0.026893147358436273,0.024575277445235962,0.027968627192314383,0.028309503192208558,0.025897487183541342,0.026441985895778897,0.029164105686928128,0.023862471107454333,0.027538762731416165,0.02355118821715844,0.02971147269537897,0.02637170767976474,0.02517321871469788,0.025161625347658326,0.0238376856830912,0.026186653612123287,0.02572248141191945,0.027424272094260852,0.026468872980762587,0.026975041037688656,0.02793563525784451,oab_exams,0.6321974148061105,0.011693058479081022,portuguese_hate_speech_binary,0.4876619791997261,0.011879060636157512,0.3024875621890547,0.007252528147349198,tweetsentbr,0.25686385409967155,0.005652318632223181
3
+ Curio-1.1b,0.26153846153846155,0.30427350427350425,0.013456870841978025,0.01285359072080353,arc_challenge_poly_pt,0.5919117647058824,0.007014725978673307,assin2_rte,0.514218466536214,0.007222733992427019,assin2_sts,2.4836723856209146,N/A,0.07991975541603662,0.013180723122871494,0.653,0.0075274109412778656,assin_entailment,0.61125,0.007708483284903614,assin_paraphrase,0.2288888888888889,0.2288888888888889,0.014011705158884454,0.014011705158884454,belebele_por_Latn,0.21557719054242003,0.2962962962962963,0.22,0.2,0.13043478260869565,0.23529411764705882,0.1794871794871795,0.32558139534883723,0.24444444444444444,0.2222222222222222,0.3,0.19642857142857142,0.07692307692307693,0.2653061224489796,0.1590909090909091,0.1951219512195122,0.008823902744351235,0.035878728468168884,0.03382089422222721,0.03110029497727864,0.028626027011646942,0.03429345074874486,0.03538376174249052,0.041243547678139994,0.03687304959885912,0.03254702284566478,0.0417490424618959,0.03059453365030812,0.021297788445280696,0.036461254097591725,0.0318491740941458,0.03562465998587211,bluex,0.5924855491329479,0.010787016363427675,calame_pt,7.195442726622208,0.4281907634053562,0.2106368089573128,0.2,0.19658119658119658,0.23931623931623933,0.20689655172413793,0.23148148148148148,0.22018348623853212,0.18487394957983194,0.21487603305785125,0.1951219512195122,0.21551724137931033,0.15789473684210525,0.26666666666666666,0.006208284363274197,0.021569958843596873,0.02130314982962761,0.022856909298189693,0.021759337613029544,0.023411090180959052,0.022870606329773658,0.020496532313505404,0.021535831633747355,0.02064902231703425,0.02196772284083863,0.018307324799023198,0.021893757442561394,enem,0.7846153846153846,0.011396120309131366,faquad_nli,0.4396551724137931,0.00357969847290883,0.79,0.76,0.04292346959909278,0.75,0.04351941398892446,0.040936018074033236,global_piqa_completions_por_latn_braz,0.49714285714285716,0.009469462937884303,hatebr_offensive_binary,0.3333170991717872,0.004372114303048875,0.38563224618051795,0.4945281178892621,0.00520463410206157,0.00506695543437015,hellaswag_poly_pt,0.4669124781680574,0.00695070843917992,lambada_poly_pt,13.387099808829497,0.42266971716683754,0.26350945661963376,0.003816635418003369,mmlu_poly_pt,0.23097949886104785,0.25882352941176473,0.23,0.23232323232323232,0.2625,0.225,0.2625,0.25,0.125,0.2125,0.22077922077922077,0.2,0.175,0.175,0.2375,0.2625,0.21794871794871795,0.225,0.24358974358974358,0.25,0.20512820512820512,0.2375,0.3,0.2,0.2625,0.225,0.25,0.2875,0.005194738785487805,0.027439944269300902,0.02428872342290118,0.024426608214068245,0.028365615520511454,0.02679842510246724,0.028440985179851747,0.02797568706326084,0.021279319778936188,0.026307154808309953,0.027338461010722775,0.025770456209499467,0.024437944365759502,0.024512681128397884,0.027522314022461036,0.028398873102793116,0.02693219019792365,0.026902677970515577,0.028004812947648394,0.02792162562280424,0.02643403297269708,0.027511954249563694,0.029515320743474856,0.02581943433410064,0.02836748660516128,0.027006123832793466,0.02796358718650669,0.029248628986210203,oab_exams,0.700352526439483,0.011073173117602544,portuguese_hate_speech_binary,0.4118866620594333,0.003830312503656316,0.30199004975124377,0.00722431375874725,tweetsentbr,0.2207190928852396,0.005560126792343992
4
+ GlorIA-1.3B,0.2282051282051282,0.2641025641025641,0.012894000957103741,0.012274572870357178,arc_challenge_poly_pt,0.0004084967320261438,0.0002888875468443105,assin2_rte,0.0008163265306122449,0.0005766197511083887,assin2_sts,3.0876144825708067,N/A,0.0018499252277953317,0.007626452306649621,0.62925,0.007637944351949736,assin_entailment,0.55525,0.007858263004593467,assin_paraphrase,0.22777777777777777,0.22777777777777777,0.013987721523687968,0.013987721523687968,belebele_por_Latn,0.043115438108484005,0.018518518518518517,0.06,0.0,0.08695652173913043,0.0392156862745098,0.02564102564102564,0.09302325581395349,0.0,0.05555555555555555,0.1,0.03571428571428571,0.038461538461538464,0.0,0.045454545454545456,0.07317073170731707,0.004379342035534402,0.01059849662524772,0.019455605444454217,0.0,0.02393813187988601,0.015672260261461707,0.014639790890850891,0.025613638507697108,0.0,0.01803138803731403,0.027391859575950646,0.0143188947970846,0.0153099274583577,0.0,0.018180821704521836,0.02350686444300002,bluex,0.5467244701348748,0.010928393964626257,calame_pt,8.81407725272937,0.5274008214469735,0.025192442267319804,0.05217391304347826,0.03418803418803419,0.008547008547008548,0.008620689655172414,0.037037037037037035,0.0,0.025210084033613446,0.03305785123966942,0.032520325203252036,0.04310344827586207,0.022556390977443608,0.007407407407407408,0.0023935785702390027,0.011981174616391144,0.009670391152263931,0.004917264857958575,0.004962849441373942,0.010464469752796527,0.0,0.008302822714234626,0.009369729407355793,0.009204065048684457,0.01091399050337258,0.007415628468143703,0.004244409187738494,enem,0.004615384615384616,0.0018805519242616397,faquad_nli,0.005836575875486381,0.0023605476210659514,0.66,0.65,0.04793724854411023,0.64,0.048241815132442176,0.04760952285695234,global_piqa_completions_por_latn_braz,0.004285714285714286,0.001234337642003019,hatebr_offensive_binary,0.008429334643985983,0.0024104858703793983,0.3109762704518366,0.3635280095351609,0.005007313634804167,0.004818667078462506,hellaswag_poly_pt,0.3667766349699204,0.00671415509873292,lambada_poly_pt,26.92767482018332,1.050217526666902,0.2369408586010207,0.0036838171444530956,mmlu_poly_pt,0.0469248291571754,0.058823529411764705,0.04,0.09090909090909091,0.05,0.075,0.0875,0.0625,0.05,0.025,0.1038961038961039,0.075,0.05,0.0875,0.05,0.0375,0.02564102564102564,0.0125,0.01282051282051282,0.05,0.0,0.0125,0.0125,0.025,0.05,0.0625,0.0375,0.0125,0.0026049402638646125,0.014750854776482417,0.011335157474383,0.01661901952847366,0.01408954349911562,0.017021321300287107,0.01827919646216392,0.015673513064935862,0.014005011769876527,0.010057543044677483,0.02009525813341219,0.017041820724611557,0.01406172841886376,0.018240863584359043,0.014023322407811277,0.012249641667408813,0.010351161941038375,0.007161586886211282,0.007357250103925693,0.014026152282691657,0.0,0.007172064350659512,0.007181123936471607,0.010057687256223139,0.014076260352637053,0.015643585548775412,0.01226066352941023,0.007141410666057684,oab_exams,0.6733254994124559,0.011312241691053231,portuguese_hate_speech_binary,0.4069602272727273,0.003975510948938515,0.001990049751243781,0.0007044015003721749,tweetsentbr,0.005040957781978576,0.001765027657389317
5
+ Llama-2-7b-hf,0.3769230769230769,0.42136752136752137,0.014441911411639847,0.014173918836988375,arc_challenge_poly_pt,0.6740196078431373,0.006710835620456361,assin2_rte,0.6678362795835099,0.006792300665167961,assin2_sts,1.7663888888888888,N/A,0.3048110215939166,0.015788404694505424,0.65525,0.007515881950601263,assin_entailment,0.58025,0.007804179052127739,assin_paraphrase,0.41444444444444445,0.41444444444444445,0.01642999261414865,0.01642999261414865,belebele_por_Latn,0.3129346314325452,0.3148148148148148,0.38,0.3090909090909091,0.2391304347826087,0.39215686274509803,0.3333333333333333,0.37209302325581395,0.4888888888888889,0.2962962962962963,0.175,0.25,0.3076923076923077,0.30612244897959184,0.2727272727272727,0.24390243902439024,0.009969505619270552,0.036329256876598826,0.039678294134804136,0.03599346924968398,0.03633164373212459,0.03949197855784038,0.04366807216063458,0.04255485170503524,0.04291632318879217,0.0358869701716168,0.034720782775491156,0.03348223483875644,0.036975503668589496,0.03798368540283085,0.038623954103394025,0.0387369030750057,bluex,0.5452793834296724,0.010931325109242962,calame_pt,8.381227605916514,0.4652707283022765,0.317704688593422,0.3217391304347826,0.3504273504273504,0.3333333333333333,0.3275862068965517,0.21296296296296297,0.30275229357798167,0.2857142857142857,0.24793388429752067,0.3089430894308943,0.27586206896551724,0.39849624060150374,0.4148148148148148,0.0071185069041738306,0.02513586335459504,0.0254514055801029,0.025154905676559303,0.025168774307588054,0.022746100670533613,0.025444626853490256,0.02390828986432756,0.02265354682908215,0.023974505172430216,0.023930503420551884,0.0244697989348971,0.024577601923143854,enem,0.7892307692307692,0.011315498087712138,faquad_nli,0.5387420425880172,0.015947697618420492,0.69,0.68,0.046882617226215076,0.67,0.04725815626252609,0.046482319871173176,global_piqa_completions_por_latn_braz,0.7564285714285715,0.008124405528975262,hatebr_offensive_binary,0.750087814765549,0.008301599688513002,0.41976378805937803,0.5675587820999025,0.0051572142265343,0.00513749138787099,hellaswag_poly_pt,0.5973219483795847,0.006832746189478245,lambada_poly_pt,8.469219625628918,0.28092071097480287,0.3863704593215251,0.004218461331907255,mmlu_poly_pt,0.3548974943052392,0.27058823529411763,0.31,0.3434343434343434,0.3625,0.3,0.35,0.3,0.375,0.4375,0.3246753246753247,0.2875,0.375,0.35,0.3875,0.3625,0.4230769230769231,0.3375,0.358974358974359,0.4,0.44871794871794873,0.425,0.275,0.35,0.3375,0.4,0.325,0.3875,0.005882846962349383,0.02790378492763792,0.026622482518022635,0.02751362330789859,0.030973172980141666,0.029480157252199733,0.030755583175643007,0.029510939434402104,0.031215644676217697,0.03190207312649421,0.030694762362786347,0.02924104094032293,0.03120076350085287,0.03086385741014139,0.031400973951021996,0.03108674319936362,0.03224265154396268,0.030479841084567003,0.0313349550880687,0.0315979581733737,0.03254920719533194,0.03191548440209446,0.028829153772694814,0.030757615157388465,0.030574793591327053,0.03159012915048848,0.03026209369911906,0.031420210587767665,oab_exams,0.7320799059929495,0.01068446733281971,portuguese_hate_speech_binary,0.5548101103177371,0.013460275567368795,0.6447761194029851,0.007536589477257637,tweetsentbr,0.5906478183333727,0.0077926677071889685
6
+ Llama-3.2-1B,0.2794871794871795,0.3170940170940171,0.01361028524106663,0.013124855972633702,arc_challenge_poly_pt,0.5882352941176471,0.0070218123805726815,assin2_rte,0.507334525939177,0.007188397754089635,assin2_sts,2.224583333333334,N/A,0.045955780853430404,0.012593613823311246,0.5435,0.00787670287061966,assin_entailment,0.592,0.007771685774331316,assin_paraphrase,0.3333333333333333,0.3333333333333333,0.015722221021219274,0.015722221021219274,belebele_por_Latn,0.24061196105702365,0.3333333333333333,0.22,0.2545454545454545,0.2391304347826087,0.35294117647058826,0.28205128205128205,0.27906976744186046,0.2222222222222222,0.18518518518518517,0.2,0.32142857142857145,0.07692307692307693,0.1836734693877551,0.29545454545454547,0.14634146341463414,0.00918450813863984,0.036920433345020556,0.03381708124028534,0.03385352790532285,0.036270575701766075,0.03846626679573715,0.04152836931393151,0.039481081903425225,0.03584521565371374,0.03059776624392069,0.03660010443949587,0.036019270828831336,0.02127476126697161,0.031877573491931877,0.039631705028242745,0.03182728636714712,bluex,0.5,0.010976425998969034,calame_pt,12.201208946373502,0.7494398762866994,0.23932820153953813,0.20869565217391303,0.23076923076923078,0.20512820512820512,0.25,0.19444444444444445,0.22018348623853212,0.2773109243697479,0.2231404958677686,0.25203252032520324,0.22413793103448276,0.2932330827067669,0.2740740740740741,0.0065391582166557044,0.021821194193248256,0.022577833785874215,0.021571765934130793,0.02317701743320838,0.02200503042160424,0.022949570275477015,0.0236507442208502,0.021868312709775902,0.022560290849782245,0.02234576698789731,0.022739146188413245,0.022134402329914975,enem,0.7846153846153846,0.011396120309131366,faquad_nli,0.4396551724137931,0.00357969847290883,0.55,0.54,0.05009082659620331,0.55,0.05,0.05,global_piqa_completions_por_latn_braz,0.6157142857142858,0.009159942279698899,hatebr_offensive_binary,0.5942501168982005,0.0094025460421741,0.35550980604615884,0.45270343482500813,0.005181606909540997,0.004982876876778654,hellaswag_poly_pt,0.4560450223170968,0.006939008354532598,lambada_poly_pt,16.9742648348312,0.6451741398076515,0.2851245872110477,0.003911393421434831,mmlu_poly_pt,0.2605922551252847,0.25882352941176473,0.3,0.26262626262626265,0.3375,0.2,0.275,0.275,0.225,0.325,0.2597402597402597,0.225,0.175,0.25,0.25,0.2875,0.2948717948717949,0.2875,0.2564102564102564,0.2375,0.20512820512820512,0.225,0.2875,0.2875,0.3125,0.2625,0.1875,0.275,0.005386461986184192,0.027440384867471097,0.02645380776413245,0.025488552454651366,0.03039472671103596,0.02583395097564109,0.028848708383473334,0.028810049821713913,0.02686101591476683,0.030166552736214616,0.028870451352124248,0.026901014069584228,0.02448435056318406,0.028030316322959166,0.027952304262603783,0.029211899882853876,0.02982724942919855,0.02914256967391907,0.02849085518421814,0.02745240596592866,0.026427350986477426,0.026916377450888172,0.029243944726852406,0.0292832531672908,0.029848826022562277,0.028356018281794258,0.025083281910977917,0.028904228657660853,oab_exams,0.39600470035252644,0.011827366874554504,portuguese_hate_speech_binary,0.37895558836892324,0.011491627437349052,0.5532338308457712,0.007826422837717034,tweetsentbr,0.4253570002160661,0.005032561246096109
7
+ Llama-3.2-3B,0.39145299145299145,0.4111111111111111,0.01439093566986938,0.014275115095154002,arc_challenge_poly_pt,0.8370098039215687,0.005271533529536472,assin2_rte,0.8363872913848014,0.0052927522344757795,assin2_sts,1.5979820261437914,N/A,0.4723223768599063,0.013196880554443183,0.64125,0.007584622007572259,assin_entailment,0.62475,0.007656632117246114,assin_paraphrase,0.6888888888888889,0.6888888888888889,0.015440185195103361,0.015440185195103361,belebele_por_Latn,0.5034770514603616,0.5185185185185185,0.56,0.4909090909090909,0.45652173913043476,0.49019607843137253,0.5384615384615384,0.5348837209302325,0.6222222222222222,0.48148148148148145,0.4,0.5714285714285714,0.36538461538461536,0.4489795918367347,0.5681818181818182,0.5121951219512195,0.010784367733402467,0.039165666800610115,0.040728414317956284,0.03895531664562946,0.04244168089400983,0.04031069876876898,0.04629846752153336,0.04390954993011661,0.04178726442093161,0.039215919397742986,0.04470273497239075,0.038062256062353576,0.038424354112899364,0.04104850469218764,0.04313526206364078,0.04516005714110088,bluex,0.5447976878612717,0.010932281525615668,calame_pt,8.003852501596521,0.4409859628025262,0.5304408677396781,0.5043478260869565,0.5213675213675214,0.6239316239316239,0.5086206896551724,0.5277777777777778,0.5963302752293578,0.5210084033613446,0.5371900826446281,0.4715447154471545,0.5344827586206896,0.48872180451127817,0.5407407407407407,0.0076415314991669404,0.0268973592518023,0.026752467437752142,0.025942928440247478,0.0267542486007239,0.027749293271461358,0.02712643400103646,0.026432197593190442,0.02610986998676507,0.02597524120559948,0.026715666252514886,0.025016411349356477,0.024836640347158843,enem,0.7846153846153846,0.011396120309131366,faquad_nli,0.4396551724137931,0.00357969847290883,0.72,0.68,0.046882617226215076,0.69,0.046482319871173176,0.045126085985421296,global_piqa_completions_por_latn_braz,0.7571428571428571,0.008111455971047425,hatebr_offensive_binary,0.7457786436970194,0.008429268851149004,0.43677538194820675,0.5913966843645032,0.005117249508496108,0.005163166113582872,hellaswag_poly_pt,0.5947991461284688,0.006839626982657816,lambada_poly_pt,8.102312909828086,0.258586819721004,0.48281296907835486,0.004329245983420869,mmlu_poly_pt,0.39453302961275627,0.32941176470588235,0.34,0.32323232323232326,0.4375,0.425,0.425,0.375,0.3125,0.4625,0.2727272727272727,0.3,0.5,0.375,0.4,0.425,0.5512820512820513,0.3125,0.48717948717948717,0.4,0.44871794871794873,0.475,0.3375,0.3875,0.5,0.3875,0.3125,0.3875,0.00603241375384913,0.029571025252496663,0.027403595798880208,0.027078861371727332,0.03208308359910106,0.031939201054921235,0.03189277843263085,0.03127909003146793,0.030007658958197937,0.0321612457617914,0.029233096626502884,0.029543323729176486,0.03239418262733038,0.03126001323255004,0.03160047198458112,0.031904806510253514,0.03248011969191096,0.02987921282067121,0.03262325568181619,0.03156103918751904,0.032507059368943575,0.03233430128575697,0.030481805842254066,0.03135688066498188,0.03233888100328145,0.031564680209607884,0.029926368391597836,0.031532263121308836,oab_exams,0.43243243243243246,0.012014774583479056,portuguese_hate_speech_binary,0.42091797560161226,0.011882654516991104,0.6796019900497512,0.007360927617991569,tweetsentbr,0.6142907530646502,0.007961952207208857
8
+ Qwen2.5-0.5B,0.26153846153846155,0.2846153846153846,0.01319750175491955,0.01285359072080353,arc_challenge_poly_pt,0.6356209150326797,0.006892532585647943,assin2_rte,0.582109295339929,0.007331226180458947,assin2_sts,1.8191380718954249,N/A,0.2901051588547862,0.01347324325351133,0.701,0.007239672175817309,assin_entailment,0.62925,0.007637944351949736,assin_paraphrase,0.4955555555555556,0.4955555555555556,0.01667527483379586,0.01667527483379586,belebele_por_Latn,0.325452016689847,0.24074074074074073,0.2,0.2545454545454545,0.30434782608695654,0.39215686274509803,0.4358974358974359,0.4883720930232558,0.37777777777777777,0.25925925925925924,0.3,0.375,0.28846153846153844,0.2653061224489796,0.38636363636363635,0.3902439024390244,0.010082949629508913,0.033650314648457434,0.03276725586342953,0.03403347276901709,0.03912212653786782,0.03928919893383506,0.045891801472824775,0.043825875164847,0.041738724241390086,0.034394197048130855,0.041852902294011604,0.03729898816220018,0.0363052964664553,0.036366440583883324,0.042394994862904316,0.04397503659041109,bluex,0.4489402697495183,0.010919042809603362,calame_pt,21.857457304437343,1.5350156062590206,0.38908327501749473,0.391304347826087,0.4700854700854701,0.42735042735042733,0.3879310344827586,0.4074074074074074,0.44036697247706424,0.31932773109243695,0.2809917355371901,0.37398373983739835,0.3879310344827586,0.3609022556390977,0.42962962962962964,0.007443284169875556,0.02625477197105158,0.026698654612265383,0.02640830173636786,0.026131583879541093,0.02722211953112732,0.027450655417084743,0.024631456820002555,0.023515388127700623,0.025257358525568874,0.026173021978641242,0.023974392036239137,0.024587339002947985,enem,0.7676923076923077,0.011634956067061063,faquad_nli,0.665723267227257,0.015215819269968494,0.44,0.44,0.049888765156985884,0.44,0.049888765156985884,0.049888765156985884,global_piqa_completions_por_latn_braz,0.6107142857142858,0.009229707694107308,hatebr_offensive_binary,0.566524314690849,0.00959163875916625,0.3145519557915267,0.37696391808429947,0.005044901576722374,0.004833699777963256,hellaswag_poly_pt,0.3908402872113332,0.006797939909671732,lambada_poly_pt,38.324870997729285,1.685322534048743,0.41166316421495047,0.004263664369353975,mmlu_poly_pt,0.358997722095672,0.29411764705882354,0.35,0.32323232323232326,0.275,0.4125,0.3875,0.375,0.3125,0.425,0.2987012987012987,0.3625,0.3875,0.375,0.425,0.3625,0.4358974358974359,0.3,0.3974358974358974,0.375,0.358974358974359,0.3625,0.2875,0.3625,0.3625,0.35,0.3,0.45,0.005934582341912763,0.028537035648693594,0.027517502965602035,0.027091760524502472,0.02886833030782253,0.03179221841723252,0.031389141697367944,0.031262856934866745,0.02982785287745174,0.031897338698164523,0.030185742336241175,0.031014561831275503,0.03150815814233259,0.03127696119756283,0.032049634138775644,0.031104393327476148,0.03240113111629356,0.029629695368752505,0.031948924207191524,0.03107712701916621,0.031433955092016526,0.031067171254792914,0.029142167459943837,0.030980891788359582,0.031062892503303612,0.03091816794663904,0.029566254244854904,0.032182187782696124,oab_exams,0.5569917743830788,0.011944134386312229,portuguese_hate_speech_binary,0.5227329865207218,0.01211134060634965,0.5805970149253732,0.007773706554450188,tweetsentbr,0.43401265897663416,0.005195883847336149
9
+ Qwen2.5-3B,0.44017094017094016,0.45213675213675214,0.014556726873374498,0.014518814188511486,arc_challenge_poly_pt,0.9170751633986928,0.003939312081073755,assin2_rte,0.9169887127006381,0.003944460877141988,assin2_sts,0.5061887254901961,N/A,0.7763270366363294,0.006692979895730144,0.715,0.0071383814077338684,assin_entailment,0.688,0.007326487518234008,assin_paraphrase,0.8322222222222222,0.8322222222222222,0.012462553256116676,0.012462553256116676,belebele_por_Latn,0.5827538247566064,0.5925925925925926,0.64,0.4727272727272727,0.5652173913043478,0.5882352941176471,0.6923076923076923,0.6744186046511628,0.5111111111111111,0.5370370370370371,0.45,0.5892857142857143,0.5961538461538461,0.5918367346938775,0.6363636363636364,0.6341463414634146,0.010636833554687496,0.038687491939200874,0.03926334116684468,0.03879934119295361,0.04214714243883002,0.03973335538577338,0.04262635239727625,0.041215315107312335,0.043080788741425874,0.0391388935819693,0.04541838835163272,0.03804495809100424,0.03920556071708757,0.04046538471070396,0.041868143754403626,0.04332142210701962,bluex,0.5838150289017341,0.010821109243860677,calame_pt,7.597552184503539,0.44479254697925036,0.6731980405878236,0.6869565217391305,0.6752136752136753,0.7435897435897436,0.6810344827586207,0.7222222222222222,0.7064220183486238,0.6386554621848739,0.6611570247933884,0.6747967479674797,0.6637931034482759,0.5864661654135338,0.6592592592592592,0.007194030926874016,0.02496999553491974,0.024955223728201237,0.023276174630836574,0.02498213421385553,0.024980672848601875,0.025260725689536388,0.025445451452214673,0.024822156988422855,0.024455830573068174,0.025270639670846015,0.024749537863050432,0.02352539411298557,enem,0.7846153846153846,0.011392316014646743,faquad_nli,0.45333525567089583,0.007989172949935636,0.75,0.74,0.0440844002276808,0.75,0.04351941398892446,0.04351941398892446,global_piqa_completions_por_latn_braz,0.7328571428571429,0.008389078412170015,hatebr_offensive_binary,0.7161136412925613,0.008792426731008484,0.43601690323978765,0.5944305991981796,0.0051112765125008045,0.005162153495320684,hellaswag_poly_pt,0.5717058024451775,0.006893971254195014,lambada_poly_pt,8.569018057605742,0.27914775660881014,0.5979435604923447,0.004247883463578573,mmlu_poly_pt,0.5034168564920274,0.4,0.53,0.42424242424242425,0.4875,0.425,0.5,0.6625,0.525,0.55,0.42857142857142855,0.4625,0.5625,0.5375,0.4625,0.4875,0.6025641025641025,0.475,0.6410256410256411,0.5125,0.5256410256410257,0.5125,0.4875,0.4125,0.5875,0.425,0.5375,0.45,0.006172707782833441,0.030600018069266433,0.028728984587133886,0.028586186890984674,0.03221643904388358,0.03196094550935526,0.032152792736914175,0.03047478197918604,0.032366496853503506,0.03204946359038111,0.032556085593172045,0.032220338952638414,0.031924166513903056,0.032150075403767395,0.032266656904429133,0.03238145017512751,0.032014812835491784,0.032250700956110316,0.031256542274803914,0.03218010119805264,0.03259419298840892,0.03221543471530624,0.032342397948467744,0.03164329148544411,0.031724519062653306,0.03192700235062703,0.0320424267324665,0.03215027215619964,oab_exams,0.6380728554641598,0.01167397067005936,portuguese_hate_speech_binary,0.6256541212495144,0.01190240741453681,0.709452736318408,0.007153067328853374,tweetsentbr,0.6293347848436309,0.007718404140238938
10
+ Qwen2.5-7B,0.5119658119658119,0.5418803418803418,0.014572494714128921,0.014619696199825475,arc_challenge_poly_pt,0.9272875816993464,0.003702103145330779,assin2_rte,0.9272467417111068,0.003704943931398557,assin2_sts,0.5438112745098039,N/A,0.7801425978893841,0.0073521701540925,0.63675,0.007605216578428336,assin_entailment,0.64325,0.007575236328086181,assin_paraphrase,0.8966666666666666,0.8966666666666666,0.010152099984713354,0.010152099984713354,belebele_por_Latn,0.6592489568845619,0.6851851851851852,0.64,0.6181818181818182,0.6521739130434783,0.6862745098039216,0.6410256410256411,0.7209302325581395,0.6,0.5555555555555556,0.55,0.7142857142857143,0.6346153846153846,0.5918367346938775,0.7954545454545454,0.8292682926829268,0.010222239959742748,0.03657877655034028,0.039379022737590266,0.037884719993605855,0.040550018219306504,0.037445717092779225,0.04428025716985036,0.03947443695247531,0.04224794985304817,0.039031104471524344,0.04535895108661322,0.03463106086950423,0.038570889341794624,0.04056613738828131,0.035060536595842184,0.033820492427867055,bluex,0.5895953757225434,0.010798765405764698,calame_pt,6.331016318891301,0.3498931986794624,0.7501749475157453,0.7565217391304347,0.8205128205128205,0.7948717948717948,0.75,0.6944444444444444,0.7889908256880734,0.773109243697479,0.6942148760330579,0.7235772357723578,0.7586206896551724,0.6842105263157895,0.7703703703703704,0.006661895905546727,0.0230960619128479,0.020508716641970074,0.021537352204701245,0.023223759558583162,0.02564289715317172,0.022616043639432867,0.022186650250654097,0.024020188075864347,0.023290684381499532,0.02292120020741255,0.023255976810932888,0.020909977995460793,enem,0.8276923076923077,0.010442708425632503,faquad_nli,0.6354823849866811,0.017996989044378166,0.79,0.77,0.042295258468165065,0.78,0.041633319989322654,0.040936018074033236,global_piqa_completions_por_latn_braz,0.7392857142857143,0.008300264465470912,hatebr_offensive_binary,0.7215852099089513,0.008739305997678607,0.49051901614476107,0.6791635063387149,0.004859317305172292,0.0052040099796551425,hellaswag_poly_pt,0.5951872695517174,0.006838580607651445,lambada_poly_pt,6.432792232973639,0.19118261048411583,0.685454818372861,0.004022813707346862,mmlu_poly_pt,0.5503416856492027,0.38823529411764707,0.57,0.46464646464646464,0.5,0.5375,0.5625,0.5625,0.5625,0.5125,0.5324675324675324,0.5875,0.575,0.65,0.4625,0.65,0.6025641025641025,0.4875,0.6666666666666666,0.55,0.5512820512820513,0.6125,0.55,0.425,0.625,0.55,0.5875,0.5625,0.006129783902934322,0.030552229774308392,0.028577024167595284,0.02893042370223487,0.032286728154301204,0.03210941038511149,0.03195508508778123,0.031913172881867735,0.03211288446698855,0.032255907207533074,0.0327848154083794,0.031703903207164104,0.03196955210984693,0.030743705436552658,0.032209069054420135,0.03086449393894805,0.031997469262833304,0.0322829061212139,0.030828652197116526,0.03192865827819913,0.03252049281297473,0.03132678982478445,0.03204689103251604,0.031885231734944924,0.031297291252952004,0.03230994062185136,0.03180715160406869,0.03201234368023496,oab_exams,0.6768507638072856,0.011383039375693928,portuguese_hate_speech_binary,0.6606390614917235,0.011751775210018398,0.7119402985074627,0.007144862699026351,tweetsentbr,0.6880382818610098,0.007475190359829984
11
+ Qwen3-0.6B-Base,0.3213675213675214,0.36923076923076925,0.01411487176822312,0.01365875315339595,arc_challenge_poly_pt,0.7704248366013072,0.0060237261370196075,assin2_rte,0.7616384755961914,0.006221506932127148,assin2_sts,1.4520710784313728,N/A,0.4787415119764903,0.012103438978960566,0.66175,0.007481525758955888,assin_entailment,0.611,0.007709384453519168,assin_paraphrase,0.65,0.65,0.015907826828129946,0.015907826828129946,belebele_por_Latn,0.4297635605006954,0.3148148148148148,0.34,0.4727272727272727,0.41304347826086957,0.3333333333333333,0.4358974358974359,0.46511627906976744,0.6222222222222222,0.37037037037037035,0.4,0.39285714285714285,0.46153846153846156,0.40816326530612246,0.5227272727272727,0.5609756097560976,0.010624069621874717,0.036504618541260525,0.038668219887727256,0.03885628962492922,0.04201135199358149,0.03807259373181974,0.045857861327211424,0.04400973662009723,0.04180445370137337,0.03778144139045231,0.04475520070581074,0.03744340894874133,0.039901359534229304,0.04062284693882503,0.04345631178709593,0.04474531499109573,bluex,0.4595375722543353,0.01094042557576075,calame_pt,19.429684823176704,1.3662099459280042,0.4947515745276417,0.5652173913043478,0.46153846153846156,0.5470085470085471,0.5258620689655172,0.46296296296296297,0.48623853211009177,0.42016806722689076,0.4297520661157025,0.5121951219512195,0.4827586206896552,0.48872180451127817,0.5481481481481482,0.007646956951004644,0.026680020872831608,0.026663293757136372,0.02656218674585355,0.02675179130903428,0.027676460836344237,0.027686666677030753,0.026158039399798605,0.025863565782438428,0.02599169178475259,0.02683500387476367,0.02497946710050272,0.024686802030459,enem,0.7846153846153846,0.011396120309131366,faquad_nli,0.4396551724137931,0.00357969847290883,0.55,0.53,0.05016135580465919,0.54,0.05009082659620331,0.05,global_piqa_completions_por_latn_braz,0.6278571428571429,0.009168853386937787,hatebr_offensive_binary,0.5726983914970847,0.00969282565383114,0.3318886119839636,0.40329396467656303,0.005106663999239563,0.004901926777428024,hellaswag_poly_pt,0.4178148651271104,0.006871231372591393,lambada_poly_pt,29.07542337165298,1.2510732337453463,0.4353797658360853,0.004295476285063696,mmlu_poly_pt,0.40455580865603646,0.2823529411764706,0.51,0.31313131313131315,0.425,0.4,0.3125,0.4875,0.4125,0.4125,0.2597402597402597,0.4625,0.35,0.4,0.425,0.4,0.4358974358974359,0.4125,0.47435897435897434,0.3875,0.44871794871794873,0.4375,0.3875,0.3875,0.425,0.4125,0.4125,0.45,0.006028693932865708,0.028157965772127796,0.028835673371721146,0.02685459507906835,0.03196459655996749,0.031601704726405444,0.029964573068397327,0.03227482301636205,0.031906786967377136,0.03181370581526733,0.028728093750264864,0.03224888801705576,0.030718715172759176,0.0315970809086799,0.0319194839832204,0.03161122832469226,0.03235436483114306,0.031774039555897145,0.03268764884130568,0.03147880361774557,0.03246506986525575,0.03203498587631824,0.03141339331287629,0.03149790293656256,0.031889666836221034,0.03181995531259529,0.03164646789285445,0.032221626718954115,oab_exams,0.6486486486486487,0.011582703889274007,portuguese_hate_speech_binary,0.6263251960166653,0.012003271468011147,0.6293532338308457,0.007636038809353393,tweetsentbr,0.5767288787146412,0.008258349957160283
12
+ Qwen3-1.7B-Base,0.42735042735042733,0.4717948717948718,0.014600598518935729,0.014468692480631705,arc_challenge_poly_pt,0.8913398692810458,0.004449394225815787,assin2_rte,0.8908853267736194,0.004468078977072449,assin2_sts,0.9090686274509804,N/A,0.6505105647565455,0.00864077185171433,0.69225,0.007298856259762564,assin_entailment,0.6875,0.0073296908933561476,assin_paraphrase,0.7788888888888889,0.7788888888888889,0.01384086369985948,0.01384086369985948,belebele_por_Latn,0.5716272600834492,0.6111111111111112,0.6,0.5818181818181818,0.5652173913043478,0.5490196078431373,0.5641025641025641,0.627906976744186,0.4666666666666667,0.4074074074074074,0.65,0.5178571428571429,0.6538461538461539,0.46938775510204084,0.6818181818181818,0.6829268292682927,0.010661289718477294,0.03823325552043678,0.04014183610342833,0.03836527563104238,0.04230489462340477,0.040285202038357004,0.04573143376509864,0.042649639713054896,0.0428522805887491,0.03868309130292124,0.043371177837045354,0.03836943542156757,0.03801987546167963,0.041102117969441736,0.04053162211074321,0.04195263458086596,bluex,0.535645472061657,0.010948497180726966,calame_pt,9.280871844047466,0.5538874697904987,0.6522043386983905,0.6260869565217392,0.6752136752136753,0.7606837606837606,0.646551724137931,0.5925925925925926,0.6422018348623854,0.680672268907563,0.6033057851239669,0.6585365853658537,0.6810344827586207,0.5939849624060151,0.6666666666666666,0.007290667201491062,0.025962923122790242,0.025060133930120108,0.022723836085722437,0.025698311482693952,0.027231014985607642,0.026522784512140768,0.02468916214399656,0.025637078705064726,0.024726327668630935,0.02498585053055827,0.024584361288093815,0.02339097338321123,enem,0.7923076923076923,0.011213547037175677,faquad_nli,0.5409100183635992,0.016017436556314785,0.73,0.67,0.04725815626252609,0.67,0.04725815626252609,0.04461960433384737,global_piqa_completions_por_latn_braz,0.8421428571428572,0.006889625530000462,hatebr_offensive_binary,0.8402347217352554,0.006971115006523322,0.3978762596164265,0.525517390833243,0.0051981631117286094,0.005095221820265538,hellaswag_poly_pt,0.5080535610324083,0.006965073974108908,lambada_poly_pt,12.357850297081347,0.44163166752665356,0.5548634043830681,0.004305649537917433,mmlu_poly_pt,0.45785876993166286,0.3058823529411765,0.58,0.42424242424242425,0.3875,0.5,0.375,0.475,0.4375,0.45,0.33766233766233766,0.45,0.525,0.5,0.425,0.5375,0.5,0.425,0.5512820512820513,0.425,0.47435897435897434,0.4875,0.475,0.425,0.55,0.4,0.425,0.5,0.00613818422725749,0.028843178399003817,0.028497978226784737,0.028603550827994963,0.03144756381638736,0.032281358229001994,0.03126616006591764,0.03224559143592351,0.03220218907884271,0.03204757164218827,0.03114377327487417,0.032306884087283806,0.03217311717354094,0.032159253430013135,0.031867381207903646,0.032196277901620635,0.03259461806023854,0.03197515786061687,0.03254023200947126,0.03182556288743423,0.03256410582927178,0.032277129335240407,0.03226398230838134,0.03190235610798057,0.03211474140760389,0.031583902694465565,0.03199397783880584,0.03213598807617757,oab_exams,0.6568742655699178,0.011485521901896055,portuguese_hate_speech_binary,0.6348498207253277,0.011929457042377096,0.7149253731343284,0.00714538842384441,tweetsentbr,0.6760920733569945,0.007705257299978287
13
+ Qwen3-4B-Base,0.5205128205128206,0.5452991452991452,0.014563744061566674,0.014611572578037524,arc_challenge_poly_pt,0.9301470588235294,0.003640592745134969,assin2_rte,0.9300814303602809,0.0036448029497531905,assin2_sts,0.4649550653594771,N/A,0.8163337305945799,0.00580871453149834,0.666,0.007458210658499574,assin_entailment,0.71375,0.007147762377258385,assin_paraphrase,0.8788888888888889,0.8788888888888889,0.010881258692882334,0.010881258692882334,belebele_por_Latn,0.6995827538247567,0.7037037037037037,0.68,0.6363636363636364,0.6739130434782609,0.6862745098039216,0.6923076923076923,0.7674418604651163,0.6222222222222222,0.6851851851851852,0.675,0.6785714285714286,0.7115384615384616,0.7551020408163265,0.7954545454545454,0.7560975609756098,0.00987599103006625,0.035778507134707614,0.03830417740221117,0.037306683346400936,0.03989883029812014,0.037540507970329595,0.04273667972852147,0.0371941393841637,0.041806722382516266,0.036401064701860976,0.04277647285391648,0.036044705437926,0.036279031701994716,0.03547498421763062,0.03517980058995743,0.038703047333597515,bluex,0.5794797687861272,0.010836861821872754,calame_pt,6.850406364902601,0.37749463083543355,0.7760671798460462,0.7478260869565218,0.7863247863247863,0.8461538461538461,0.8189655172413793,0.7314814814814815,0.7522935779816514,0.7647058823529411,0.7355371900826446,0.7804878048780488,0.7758620689655172,0.7969924812030075,0.7703703703703704,0.006385535606705982,0.023380316711303327,0.021860040682351863,0.01916577353184397,0.020734765688638912,0.024643765540019624,0.02386816916174888,0.022476114893258195,0.02306001318069786,0.021626260503878513,0.02228539366001018,0.02019148947196448,0.020946902770892308,enem,0.8353846153846154,0.010287216042154528,faquad_nli,0.6628287480305417,0.017895588950111792,0.76,0.77,0.042295258468165065,0.77,0.042295258468165065,0.04292346959909278,global_piqa_completions_por_latn_braz,0.7992857142857143,0.007560193223282475,hatebr_offensive_binary,0.7928373541528106,0.007772855813762827,0.46158847112363205,0.6319211182143244,0.005020512956204801,0.00518956384203374,hellaswag_poly_pt,0.6037259848631865,0.006814434238262858,lambada_poly_pt,7.583741142779511,0.24176137158913064,0.6859051335935155,0.004021253314376942,mmlu_poly_pt,0.5558086560364465,0.4588235294117647,0.54,0.5252525252525253,0.525,0.6,0.5125,0.6625,0.5,0.4875,0.4935064935064935,0.5625,0.525,0.675,0.5,0.6625,0.6410256410256411,0.5,0.717948717948718,0.5875,0.5384615384615384,0.6375,0.525,0.525,0.5875,0.5125,0.55,0.475,0.006127218036974305,0.031198197104158432,0.028656685539031335,0.028926788825821715,0.032305147348093755,0.0314776521000435,0.03224512117668164,0.03047345294551832,0.032309834201479974,0.03228452872706116,0.032868774205327654,0.03198991204340402,0.03228440696429048,0.030260493341651394,0.03242290492979508,0.030694146857333706,0.03145873209101905,0.0323148427623094,0.02943595665639766,0.03179334906945783,0.03264332278766294,0.03094860353483003,0.032258020754864246,0.032170680018503144,0.03173493548242147,0.0322803705254934,0.0321089376330344,0.03222483748707163,oab_exams,0.6298472385428907,0.011732397046791484,portuguese_hate_speech_binary,0.6244520500926733,0.011826752940830283,0.7268656716417911,0.007043524266760944,tweetsentbr,0.6800118921455741,0.007662077752331052
14
+ Qwen3-8B-Base,0.5777777777777777,0.6076923076923076,0.014280651660888172,0.014445870094078068,arc_challenge_poly_pt,0.9436274509803921,0.003293486114139192,assin2_rte,0.9436123959791051,0.003294993753676702,assin2_sts,0.37985702614379085,N/A,0.8289621473775062,0.005666658970954797,0.681,0.00737043627153157,assin_entailment,0.6755,0.007403623498681409,assin_paraphrase,0.8955555555555555,0.8955555555555555,0.010200209515456861,0.010200209515456861,belebele_por_Latn,0.713490959666203,0.7222222222222222,0.66,0.6909090909090909,0.6304347826086957,0.6862745098039216,0.6923076923076923,0.6744186046511628,0.7555555555555555,0.5925925925925926,0.725,0.7142857142857143,0.7884615384615384,0.7346938775510204,0.8409090909090909,0.8292682926829268,0.009750845418246586,0.035120719300628205,0.038843699205529846,0.03584037127491545,0.04106830091429943,0.03746118505272808,0.04257962999017108,0.04126372987503008,0.03708509316749288,0.03853845230196164,0.04083279442974535,0.034699126177332555,0.03272115517531264,0.03646190745970515,0.03191664107218092,0.03387673419138132,bluex,0.558766859344894,0.010900347208855708,calame_pt,6.466359408942032,0.3466622263157279,0.7977606717984604,0.8,0.811965811965812,0.8803418803418803,0.8706896551724138,0.8055555555555556,0.7889908256880734,0.7983193277310925,0.7520661157024794,0.7886178861788617,0.7931034482758621,0.706766917293233,0.7925925925925926,0.006161019535030461,0.021528336715752047,0.020845928956570253,0.01733498713187168,0.017922527056035895,0.02202774397227165,0.022590071383569513,0.021252821555969748,0.022533457909028817,0.0212815344612527,0.021738372994113467,0.02272474487474605,0.020181350422757136,enem,0.8938461538461538,0.008538924014622657,faquad_nli,0.8264822516684398,0.013616260573397451,0.8,0.78,0.041633319989322654,0.78,0.041633319989322654,0.04020151261036849,global_piqa_completions_por_latn_braz,0.7557142857142857,0.008118347610974668,hatebr_offensive_binary,0.7411578483016321,0.00850887247946856,0.4996207606457904,0.6814389424639723,0.004850159531532225,0.005204944301922899,hellaswag_poly_pt,0.5916941587424801,0.006847838514732699,lambada_poly_pt,6.396338923336681,0.1863655739455019,0.7165265685980187,0.003904552941174207,mmlu_poly_pt,0.5749430523917995,0.5176470588235295,0.59,0.4444444444444444,0.55,0.55,0.5875,0.625,0.575,0.575,0.5324675324675324,0.5375,0.5625,0.675,0.55,0.6375,0.6282051282051282,0.475,0.6794871794871795,0.5375,0.5256410256410257,0.55,0.5875,0.575,0.6625,0.5625,0.6875,0.575,0.00609642254740074,0.03128080035407769,0.02844123015811447,0.02881402070182747,0.03216610182334634,0.03209013967175254,0.03173422195267694,0.0313012347924546,0.031887072844251976,0.031865797838096364,0.032799055856276155,0.03219520051277247,0.03211581313089161,0.030217656620056536,0.03206107381944875,0.031095543695448227,0.031613688447698844,0.032195292760102204,0.030456830764819087,0.03210994535356873,0.032666626828524226,0.032079094610487316,0.03180068694144846,0.03189687257289009,0.03054185670097376,0.03203201891555038,0.029948526094723246,0.03192725677088334,oab_exams,0.7379553466509988,0.01066781737292724,portuguese_hate_speech_binary,0.7086247372566601,0.011573601869518073,0.753731343283582,0.006805849274708307,tweetsentbr,0.7196722822375756,0.007340906455605638
15
+ TeenyTinyLlama-160m,0.19829059829059828,0.24017094017094018,0.0124942710453615,0.011661443012747946,arc_challenge_poly_pt,0.5800653594771242,0.007022804722718477,assin2_rte,0.5347494401931001,0.007213748340880085,assin2_sts,2.7246813725490204,N/A,0.003318306693038656,0.008598896250653267,0.49325,0.007905962018240315,assin_entailment,0.62175,0.007668698495126294,assin_paraphrase,0.2677777777777778,0.2677777777777778,0.014768244481214481,0.014768244481214481,belebele_por_Latn,0.22531293463143254,0.24074074074074073,0.22,0.3090909090909091,0.32608695652173914,0.19607843137254902,0.1794871794871795,0.18604651162790697,0.26666666666666666,0.2037037037037037,0.225,0.21428571428571427,0.1346153846153846,0.2653061224489796,0.2727272727272727,0.12195121951219512,0.008984366737824847,0.03370733106880584,0.03389086722142001,0.03605649291955817,0.03997602093846583,0.032191093748247884,0.035416863774369305,0.034348501842554384,0.0378328863723703,0.03176098166714969,0.03812533904864976,0.031583325152237954,0.0272944923482609,0.036285745982029896,0.03874940132780957,0.029585965691276164,bluex,0.3978805394990366,0.010745054694103391,calame_pt,25.811401954273116,1.79087935238306,0.18894331700489853,0.20869565217391303,0.21367521367521367,0.24786324786324787,0.1810344827586207,0.16666666666666666,0.12844036697247707,0.226890756302521,0.17355371900826447,0.21138211382113822,0.16379310344827586,0.18796992481203006,0.15555555555555556,0.005986953126788941,0.021840693304140117,0.021840740788773776,0.022978991954041482,0.020668726133380073,0.02068451806634303,0.01851025833246064,0.022131578483793702,0.01983984871785401,0.021156735733243007,0.019840875177189073,0.019534648514636748,0.018035734487842733,enem,0.7846153846153846,0.011396120309131366,faquad_nli,0.4396551724137931,0.00357969847290883,0.59,0.57,0.049756985195624305,0.58,0.04960449637488582,0.04943110704237104,global_piqa_completions_por_latn_braz,0.5092857142857142,0.009477572820066811,hatebr_offensive_binary,0.3728445198230819,0.006927590081721923,0.2768447285729765,0.29894896521833353,0.004765621557686009,0.004657792255886432,hellaswag_poly_pt,0.17737240442460703,0.005321777787774099,lambada_poly_pt,195.8573020160999,8.420107161852947,0.25743020114079856,0.003787890143299065,mmlu_poly_pt,0.22323462414578588,0.24705882352941178,0.19,0.2828282828282828,0.25,0.2125,0.2375,0.225,0.175,0.2,0.2077922077922078,0.175,0.1625,0.175,0.275,0.25,0.24358974358974358,0.2125,0.24358974358974358,0.25,0.23076923076923078,0.25,0.2375,0.2375,0.25,0.2,0.1625,0.2375,0.005127041237604632,0.027042262913908738,0.022610898558232256,0.02612280528824573,0.027906432539943866,0.026254156596042817,0.02748025365254729,0.026968308403730747,0.024453251929326673,0.025728171531313702,0.026767452136385567,0.024450834795221654,0.02379321281672535,0.024500457146565055,0.028754788100139816,0.027845276878773455,0.028006227266103845,0.026347203745785593,0.028014432052580935,0.027932959678206467,0.027589385747782705,0.027896028383565717,0.027506939315053488,0.027528533271181592,0.027914660082074307,0.025795907677240118,0.023840298088979372,0.027546723108028172,oab_exams,0.6568742655699178,0.011474018662538724,portuguese_hate_speech_binary,0.4241472005932518,0.008233882524185767,0.29203980099502486,0.007178909829242862,tweetsentbr,0.1518354122953803,0.003021947881823276
16
+ TeenyTinyLlama-460m,0.2282051282051282,0.27350427350427353,0.01303741033837626,0.012274572870357178,arc_challenge_poly_pt,0.6066176470588235,0.006967267659350775,assin2_rte,0.5373876424062898,0.0072909925117069325,assin2_sts,1.7032271241830066,N/A,0.1155141518202612,0.0145274851476106,0.62625,0.007650481477770561,assin_entailment,0.583,0.007796983277977404,assin_paraphrase,0.2811111111111111,0.2811111111111111,0.014993051943690223,0.014993051943690223,belebele_por_Latn,0.25869262865090403,0.2222222222222222,0.26,0.2909090909090909,0.30434782608695654,0.27450980392156865,0.3333333333333333,0.2558139534883721,0.4,0.18518518518518517,0.2,0.25,0.19230769230769232,0.24489795918367346,0.29545454545454547,0.1951219512195122,0.009443470142086071,0.032686823807106226,0.03588778454840895,0.03534696556525206,0.039083609550802104,0.035978720494247995,0.04367658685960787,0.03838360363384919,0.042273908274461,0.03057948748749918,0.0365322052413754,0.03341142258743985,0.031473789662835525,0.03547239664590251,0.03968072648772813,0.03565292949195198,bluex,0.42485549132947975,0.010851756902154302,calame_pt,16.894236491724712,1.0653800314962658,0.20153953813855843,0.26956521739130435,0.1794871794871795,0.17094017094017094,0.1810344827586207,0.1574074074074074,0.27522935779816515,0.21008403361344538,0.18181818181818182,0.16260162601626016,0.21551724137931033,0.18045112781954886,0.23703703703703705,0.006120374796135436,0.02394669490793456,0.020513771570222935,0.020146273865937696,0.02069031074680812,0.020315152240674064,0.024695300517388545,0.021574718048588012,0.020219473413749953,0.019087382840913217,0.021985358470882337,0.019229278363485952,0.021137632687647255,enem,0.7292307692307692,0.012297143166681311,faquad_nli,0.4622442839951865,0.01125568350129279,0.59,0.6,0.0492365963917331,0.59,0.04943110704237104,0.04943110704237104,global_piqa_completions_por_latn_braz,0.5007142857142857,0.009478831508132182,hatebr_offensive_binary,0.3374295434926116,0.004633493924907901,0.30404160797486185,0.3481417271643732,0.0049590768399805,0.00478855362695265,hellaswag_poly_pt,0.21560256161459343,0.005729373228646617,lambada_poly_pt,84.67093520631353,3.3468370418222366,0.26651155809066346,0.00383048395542772,mmlu_poly_pt,0.27015945330296126,0.23529411764705882,0.25,0.25252525252525254,0.2625,0.25,0.2375,0.25,0.325,0.325,0.3116883116883117,0.3125,0.3375,0.2875,0.25,0.3125,0.34615384615384615,0.2125,0.24358974358974358,0.2,0.24358974358974358,0.2375,0.25,0.3125,0.25,0.25,0.275,0.2875,0.0054609991139087195,0.02662760399525724,0.025044458442251318,0.02517984462055334,0.028487240212836117,0.027880232512176514,0.027469022000539828,0.027921672471627964,0.03022483875633216,0.03021485499773824,0.030481562874179448,0.029820761573409536,0.030563917132574843,0.029258325248078573,0.02798578403862147,0.029880203667916966,0.031035358629157586,0.02648333884237542,0.02805128221432659,0.025728334229440937,0.028091512596453462,0.02736879425512706,0.02790877447640268,0.029883741509311442,0.027933546443914634,0.027943914553281646,0.028829140552916924,0.029338075163090605,oab_exams,0.2984723854289072,0.011059935171647581,portuguese_hate_speech_binary,0.22986425339366515,0.006562248201388681,0.29850746268656714,0.007220602019931032,tweetsentbr,0.17221375341280776,0.004489438407926224
17
+ Tucano-160m,0.20512820512820512,0.25555555555555554,0.01275708808764098,0.01181011095534929,arc_challenge_poly_pt,0.5004084967320261,0.007138907308242692,assin2_rte,0.33424046164366045,0.003238145692219769,assin2_sts,1.0325898692810456,N/A,0.03738144071785769,0.01248498955038447,0.62975,0.007635824174750682,assin_entailment,0.66175,0.007481525758955888,assin_paraphrase,0.23444444444444446,0.23444444444444446,0.014129554968110759,0.014129554968110759,belebele_por_Latn,0.24756606397774686,0.2222222222222222,0.26,0.2909090909090909,0.32608695652173914,0.27450980392156865,0.28205128205128205,0.2558139534883721,0.35555555555555557,0.18518518518518517,0.2,0.23214285714285715,0.19230769230769232,0.22448979591836735,0.29545454545454547,0.12195121951219512,0.009308601345071357,0.03258446080825427,0.03585259404178441,0.03534696556525206,0.03980802584562823,0.03600293060894696,0.04167152779428253,0.038398360798650645,0.041130295896316844,0.030544103079447143,0.0365322052413754,0.03263404400674921,0.031473789662835525,0.034397600766991714,0.03968072648772813,0.02935580284027595,bluex,0.4359344894026975,0.010885950055154976,calame_pt,16.453354676204906,1.0265451479004268,0.20573827851644508,0.20869565217391303,0.17094017094017094,0.18803418803418803,0.1206896551724138,0.25925925925925924,0.24770642201834864,0.24369747899159663,0.2231404958677686,0.21951219512195122,0.22413793103448276,0.17293233082706766,0.2,0.006183493299173988,0.021880250883568254,0.020054974296173193,0.020809668556382725,0.017409145665642917,0.024317821210400686,0.023936944609567287,0.022753385474322096,0.021828798090394206,0.021491797798553338,0.022377612796663126,0.018902880821541908,0.019987281326313784,enem,0.7846153846153846,0.011396120309131366,faquad_nli,0.4396551724137931,0.00357969847290883,0.61,0.6,0.0492365963917331,0.59,0.04943110704237104,0.04902071300001973,global_piqa_completions_por_latn_braz,0.5135714285714286,0.009475463608662811,hatebr_offensive_binary,0.37619041699190825,0.006990825769414682,0.2969985913966844,0.33730631704410013,0.004921696847832917,0.0047566533161401275,hellaswag_poly_pt,0.21637880846109062,0.005736837143370836,lambada_poly_pt,100.04408435759302,4.017047194910349,0.25773041128790153,0.0037893319638641954,mmlu_poly_pt,0.17220956719817768,0.058823529411764705,0.08,0.15151515151515152,0.1125,0.1,0.1375,0.075,0.1875,0.2,0.2077922077922078,0.1625,0.25,0.1625,0.175,0.2375,0.19230769230769232,0.1625,0.16666666666666666,0.2,0.19230769230769232,0.2,0.2125,0.2125,0.1375,0.2375,0.2125,0.2625,0.0046481481234839795,0.014701158124870163,0.01571678703480073,0.02081384191094631,0.020368372891685624,0.019334640417162624,0.022197338281285513,0.01705162123869162,0.025150194064610026,0.025818053880754605,0.026635228844908092,0.023783887366137926,0.027954863257438074,0.02381155795249743,0.02454267334118202,0.027364143733242467,0.025678523577336784,0.02388500120718443,0.024386758429616694,0.02561692945294449,0.025741860937932584,0.025797450459718978,0.026395929595712773,0.026403354771226538,0.022257469307837546,0.027476108486177493,0.026363726494584048,0.028515556389380706,oab_exams,0.299647473560517,0.011071844042014278,portuguese_hate_speech_binary,0.23174470457079152,0.006694730628151124,0.2990049751243781,0.007220024240766718,tweetsentbr,0.1649293949213547,0.0037350874409846696
18
+ Tucano-1b1,0.2512820512820513,0.30085470085470084,0.01341389388061822,0.012686230587653974,arc_challenge_poly_pt,0.6486928104575164,0.006787013650230902,assin2_rte,0.6135725377473816,0.007144240512923472,assin2_sts,2.4035539215686272,N/A,0.14012459337602956,0.012748716626770866,0.61525,0.007693773970846728,assin_entailment,0.64025,0.007589261070479719,assin_paraphrase,0.25666666666666665,0.25666666666666665,0.014567891342380044,0.014567891342380044,belebele_por_Latn,0.2545201668984701,0.2222222222222222,0.26,0.3090909090909091,0.34782608695652173,0.3137254901960784,0.28205128205128205,0.23255813953488372,0.3333333333333333,0.18518518518518517,0.2,0.26785714285714285,0.19230769230769232,0.20408163265306123,0.3181818181818182,0.14634146341463414,0.00940262372092274,0.03263414933505183,0.03585259404178441,0.035964950486808686,0.04046682629633164,0.03741527482650915,0.04167152779428253,0.03716515848327099,0.04057542870297923,0.030596318881897913,0.0365322052413754,0.03422797593018743,0.031473789662835525,0.033271897387818494,0.040524370358501566,0.03169189946416069,bluex,0.48940269749518306,0.010973960354844419,calame_pt,10.545774603489813,0.6053107430127831,0.21553533939818054,0.2,0.19658119658119658,0.21367521367521367,0.1896551724137931,0.23148148148148148,0.1834862385321101,0.2605042016806723,0.23140495867768596,0.21138211382113822,0.2413793103448276,0.19548872180451127,0.22962962962962963,0.006278383435271929,0.021516686955447135,0.02116920025706272,0.02183405335297127,0.021021605266427976,0.023408389409943914,0.02146059677794695,0.02327270480574693,0.022080800125715935,0.021192450512050303,0.022942503261581686,0.019811040539733787,0.020979563165554115,enem,0.7846153846153846,0.011396120309131366,faquad_nli,0.4396551724137931,0.00357969847290883,0.77,0.67,0.04725815626252609,0.68,0.046882617226215076,0.042295258468165065,global_piqa_completions_por_latn_braz,0.4042857142857143,0.009292935956448629,hatebr_offensive_binary,0.28887437998811133,0.0048086050503006105,0.3539928486293206,0.4410011918951132,0.005168583414494963,0.004978082780068716,hellaswag_poly_pt,0.2843004075295944,0.006284434424367963,lambada_poly_pt,33.61180584452193,1.1407356100045754,0.2525517862503753,0.003764131286734753,mmlu_poly_pt,0.26378132118451025,0.23529411764705882,0.25,0.25252525252525254,0.2625,0.25,0.2375,0.25,0.3125,0.325,0.2987012987012987,0.3125,0.325,0.275,0.25,0.3125,0.3333333333333333,0.2125,0.24358974358974358,0.2,0.24358974358974358,0.225,0.225,0.2875,0.2375,0.225,0.275,0.275,0.005422667066381477,0.02662760399525724,0.025044458442251318,0.02517984462055334,0.028487240212836117,0.027880232512176514,0.027469022000539828,0.027921672471627964,0.029891600574479746,0.03021485499773824,0.030147139671852726,0.029820761573409536,0.03030808698061671,0.028841270350129174,0.02798578403862147,0.029880203667916966,0.030764130576165345,0.02648333884237542,0.02805128221432659,0.025728334229440937,0.028091512596453462,0.02686296748328316,0.026968247099868357,0.029172210122808108,0.02747247343010752,0.02693929528948014,0.028829140552916924,0.028925570890050427,oab_exams,0.700352526439483,0.01107314367626205,portuguese_hate_speech_binary,0.4118866620594333,0.003830370476228163,0.42885572139303485,0.007791951143078489,tweetsentbr,0.32433768451200706,0.005274199712998019
19
+ Tucano-2b4,0.2658119658119658,0.30427350427350425,0.013456870841978025,0.012920629079540702,arc_challenge_poly_pt,0.5833333333333334,0.0070489866977433,assin2_rte,0.5662370312001651,0.007149037300176883,assin2_sts,1.0109885620915033,N/A,0.0,0.0,0.62475,0.007656632117246114,assin_entailment,0.649,0.00754744930408608,assin_paraphrase,0.2588888888888889,0.2588888888888889,0.014608933836168536,0.014608933836168536,belebele_por_Latn,0.2545201668984701,0.2222222222222222,0.26,0.2909090909090909,0.32608695652173914,0.29411764705882354,0.28205128205128205,0.2558139534883721,0.3333333333333333,0.2222222222222222,0.225,0.25,0.19230769230769232,0.20408163265306123,0.29545454545454547,0.17073170731707318,0.009390824006099257,0.032595860426050755,0.03585259404178441,0.03534696556525206,0.03980802584562823,0.0367251916768685,0.04167152779428253,0.038391548705677424,0.04057542870297923,0.03271675745521854,0.038182925807022976,0.033436522944707954,0.031473789662835525,0.033271897387818494,0.03968072648772813,0.03378335774801119,bluex,0.5033718689788054,0.010976176403171427,calame_pt,9.49209259932395,0.5263061269877062,0.21623512946116166,0.20869565217391303,0.23931623931623933,0.20512820512820512,0.14655172413793102,0.2037037037037037,0.23853211009174313,0.2773109243697479,0.24793388429752067,0.17886178861788618,0.21551724137931033,0.18796992481203006,0.24444444444444444,0.006277904370218341,0.021765728616016465,0.02272041965205983,0.021529317313180734,0.018938667296315048,0.02237966187773931,0.02368841663983037,0.023752890984876773,0.02261999938726175,0.019911809583291265,0.022065788087919966,0.019539265289673084,0.02139163854265848,enem,0.7846153846153846,0.011396120309131366,faquad_nli,0.4396551724137931,0.00357969847290883,0.79,0.73,0.04461960433384737,0.73,0.04461960433384737,0.040936018074033236,global_piqa_completions_por_latn_braz,0.3457142857142857,0.009009695782470156,hatebr_offensive_binary,0.2930836414791883,0.007346065796046769,0.3766388557806913,0.4884602882219092,0.005203559381780298,0.005044041278203687,hellaswag_poly_pt,0.3238889967009509,0.006519573290659083,lambada_poly_pt,24.273200696658762,0.7866069863200691,0.2623836685679976,0.003811383485943047,mmlu_poly_pt,0.2674259681093394,0.23529411764705882,0.25,0.25252525252525254,0.2625,0.25,0.2375,0.25,0.325,0.3375,0.2987012987012987,0.3125,0.325,0.275,0.25,0.3125,0.3333333333333333,0.2125,0.24358974358974358,0.2125,0.2564102564102564,0.2375,0.2375,0.2875,0.25,0.225,0.2875,0.275,0.005445985841714603,0.02662760399525724,0.025044458442251318,0.02517984462055334,0.028487240212836117,0.027880232512176514,0.027469022000539828,0.027921672471627964,0.03022483875633216,0.030543984411529904,0.030147139671852726,0.029820761573409536,0.03030808698061671,0.028841270350129174,0.02798578403862147,0.029880203667916966,0.030764130576165345,0.02648333884237542,0.02805128221432659,0.026296568931966004,0.028575147777311117,0.027367690363679913,0.027441340485316153,0.029172210122808108,0.027940352218918003,0.02693929528948014,0.02920132860646106,0.028925570890050427,oab_exams,0.6933019976498237,0.011150129600038914,portuguese_hate_speech_binary,0.42029518850565717,0.006199117983084853,0.6109452736318408,0.0077075449797958705,tweetsentbr,0.5713014408585765,0.007754415889438553
20
+ Tucano-630m,0.2504273504273504,0.28717948717948716,0.013233037605915245,0.012671863600300018,arc_challenge_poly_pt,0.6025326797385621,0.006977630012943611,assin2_rte,0.5816446664131609,0.0071347908001721735,assin2_sts,2.495400326797386,N/A,0.10043729420444052,0.013734856379037043,0.666,0.007458210658499574,assin_entailment,0.6405,0.007588104660129779,assin_paraphrase,0.2733333333333333,0.2733333333333333,0.014863944409417502,0.014863944409417502,belebele_por_Latn,0.2670375521557719,0.25925925925925924,0.26,0.3090909090909091,0.32608695652173914,0.29411764705882354,0.3076923076923077,0.2558139534883721,0.37777777777777777,0.2222222222222222,0.225,0.25,0.19230769230769232,0.22448979591836735,0.3181818181818182,0.1951219512195122,0.009528424093467875,0.03437527707159389,0.03585259404178441,0.036002910832560193,0.03980802584562823,0.03678406842195817,0.04283183998954796,0.038398360798650645,0.04167427031075236,0.03271675745521854,0.038182925807022976,0.03341142258743985,0.031473789662835525,0.03446216033778414,0.040524370358501566,0.03556191511650078,bluex,0.4730250481695568,0.01096044040918296,calame_pt,11.615778427996,0.6631341424368952,0.21693491952414276,0.19130434782608696,0.20512820512820512,0.20512820512820512,0.15517241379310345,0.23148148148148148,0.2018348623853211,0.29411764705882354,0.256198347107438,0.21138211382113822,0.21551724137931033,0.21052631578947367,0.2222222222222222,0.006292669545184319,0.021164556524966373,0.021507384283901695,0.02152811625920136,0.019394504622995114,0.023415255201445617,0.02222243965433611,0.024130828527305574,0.022846894067722442,0.021198750818659993,0.02201297352125232,0.020357617909243248,0.02071694196668295,enem,0.7846153846153846,0.011396120309131366,faquad_nli,0.4396551724137931,0.00357969847290883,0.71,0.67,0.04725815626252609,0.68,0.046882617226215076,0.045604802157206865,global_piqa_completions_por_latn_braz,0.5685714285714286,0.009367575808491036,hatebr_offensive_binary,0.5497826647158669,0.009491582018091594,0.3340556940080182,0.4037273810813739,0.005107551363682595,0.004909922096356381,hellaswag_poly_pt,0.2619833106928003,0.00612607031482513,lambada_poly_pt,48.0947065279568,1.7580556076650538,0.25600420294205944,0.003781009554619628,mmlu_poly_pt,0.269248291571754,0.23529411764705882,0.25,0.25252525252525254,0.2625,0.25,0.25,0.25,0.325,0.325,0.3116883116883117,0.3125,0.325,0.2875,0.2375,0.3125,0.34615384615384615,0.2125,0.24358974358974358,0.2,0.2692307692307692,0.2375,0.2375,0.3,0.2625,0.2375,0.275,0.275,0.00545583795277233,0.02662760399525724,0.025044458442251318,0.02517984462055334,0.028487240212836117,0.027880232512176514,0.027949660198192516,0.027921672471627964,0.03022483875633216,0.03021485499773824,0.030481562874179448,0.029820761573409536,0.03030808698061671,0.029258325248078573,0.027493225333866663,0.029880203667916966,0.031035358629157586,0.02648333884237542,0.02805128221432659,0.025728334229440937,0.02906186881960052,0.02736879425512706,0.02745690409624145,0.029539631783275743,0.02838086518015215,0.027465636498159574,0.028829140552916924,0.028925570890050427,oab_exams,0.3396004700352526,0.011441579379090646,portuguese_hate_speech_binary,0.3131498523650318,0.010561070965318428,0.45124378109452734,0.007861876742013206,tweetsentbr,0.2072905953605302,0.0024889383614547437
21
+ granite-3.3-2b-base,0.37264957264957266,0.41367521367521365,0.014404282204332606,0.014141587247061934,arc_challenge_poly_pt,0.8055555555555556,0.005664611020623134,assin2_rte,0.8023865940463358,0.005744998020558099,assin2_sts,1.452312091503268,N/A,0.5283561147031579,0.011861133126886747,0.68175,0.0073658195877682135,assin_entailment,0.675,0.0074065825024832825,assin_paraphrase,0.6566666666666666,0.6566666666666666,0.01583618712450829,0.01583618712450829,belebele_por_Latn,0.4534075104311544,0.42592592592592593,0.34,0.41818181818181815,0.5217391304347826,0.45098039215686275,0.46153846153846156,0.5581395348837209,0.5333333333333333,0.3888888888888889,0.45,0.48214285714285715,0.4423076923076923,0.4489795918367347,0.45454545454545453,0.4634146341463415,0.010737376082624565,0.03894139481100792,0.03869808895773653,0.03834273624139255,0.04260722039744891,0.04022732342855381,0.04615567132594318,0.043711369449436654,0.0429885246542778,0.03842975687235822,0.04546679868335908,0.03860350419428315,0.039700076052028105,0.04108542388416878,0.04333929119847271,0.04491732645335962,bluex,0.5876685934489403,0.010806384050731128,calame_pt,6.871733571219698,0.3863418590645055,0.5402379286214136,0.4956521739130435,0.5128205128205128,0.6324786324786325,0.5689655172413793,0.5185185185185185,0.5412844036697247,0.5294117647058824,0.5454545454545454,0.5447154471544715,0.5603448275862069,0.5037593984962406,0.5333333333333333,0.007610905607743362,0.02690801312792865,0.026699941842148155,0.02568371510524713,0.02646837875883036,0.027758164764029935,0.02758977152907364,0.02636805588073962,0.026123244122537664,0.025928314867697432,0.026627823363633386,0.02507626391778475,0.024800081646743105,enem,0.7846153846153846,0.011396120309131366,faquad_nli,0.4396551724137931,0.00357969847290883,0.75,0.72,0.045126085985421296,0.7,0.04605661864718383,0.04351941398892446,global_piqa_completions_por_latn_braz,0.695,0.00871012498492208,hatebr_offensive_binary,0.6656178769699767,0.00927726743886742,0.44988622819373714,0.6080832159497237,0.005081882819370185,0.005178736512257462,hellaswag_poly_pt,0.5821851348728896,0.006871231372591393,lambada_poly_pt,8.394705035640424,0.2633573955358196,0.45631942359651756,0.004315244150188743,mmlu_poly_pt,0.3954441913439636,0.36470588235294116,0.38,0.3434343434343434,0.3625,0.4375,0.3875,0.3875,0.375,0.4375,0.3246753246753247,0.4,0.4375,0.425,0.325,0.425,0.44871794871794873,0.3125,0.5128205128205128,0.4,0.46153846153846156,0.4375,0.35,0.4,0.475,0.4,0.3625,0.325,0.006011459864146542,0.030131183514046884,0.027995185108802462,0.02751820105827811,0.031148189897444594,0.03197249501242128,0.031433326474546254,0.031420822379549185,0.031261737062560024,0.032073596025625116,0.030754033676943116,0.031575594902594754,0.03207544651983603,0.031872359062171715,0.03015613631734852,0.031922162437299985,0.032470247502144084,0.029915793897993842,0.03265504892158722,0.03170056859861937,0.032660523223876706,0.03214541629196053,0.03080539770754021,0.03162400672474354,0.03216034101703433,0.031662693844808576,0.030995965897894905,0.030240576881592363,oab_exams,0.6404230317273796,0.011641719509380073,portuguese_hate_speech_binary,0.6263773788480522,0.01190534003929822,0.7278606965174129,0.007022236403107582,tweetsentbr,0.6750132403173793,0.007691318972392456
22
+ granite-3.3-8b-base,0.44700854700854703,0.5008547008547009,0.014623863148445866,0.014541522092430072,arc_challenge_poly_pt,0.9158496732026143,0.0039745872555855405,assin2_rte,0.9158469208551745,0.003975438929837884,assin2_sts,0.6269035947712418,N/A,0.7685908309717481,0.006273380002195156,0.71,0.007175504997419485,assin_entailment,0.7115,0.007164479739833565,assin_paraphrase,0.8133333333333334,0.8133333333333334,0.012995345372845437,0.012995345372845437,belebele_por_Latn,0.5605006954102921,0.5555555555555556,0.56,0.5272727272727272,0.5,0.5490196078431373,0.6923076923076923,0.6511627906976745,0.6,0.37037037037037035,0.5,0.5357142857142857,0.5769230769230769,0.5918367346938775,0.6136363636363636,0.6585365853658537,0.010664614338865541,0.038981917336824115,0.04051152833255342,0.038892842667511966,0.04265701817144015,0.04024832971347391,0.04263159098365453,0.041929360059079095,0.0421027367622441,0.037732985324998844,0.04562020328295444,0.03847519665482412,0.03958276365871327,0.04059763305577338,0.04220164692869368,0.04269031639853999,bluex,0.6083815028901735,0.0107154533118165,calame_pt,5.7874671110198985,0.3022809483054912,0.6501049685094472,0.6086956521739131,0.6581196581196581,0.7948717948717948,0.6379310344827587,0.6388888888888888,0.6788990825688074,0.6470588235294118,0.6198347107438017,0.6260162601626016,0.5948275862068966,0.6390977443609023,0.6592592592592592,0.007325009042000765,0.026281412322788557,0.025246133281977116,0.02158575253188919,0.02585429047398459,0.026738307855663536,0.025844532726528244,0.025207824009913667,0.025446510992896194,0.025193542252232266,0.026248322844131276,0.024102627788501176,0.02354401776958053,enem,0.8461538461538461,0.010016252837382882,faquad_nli,0.7264309764309764,0.01640001077951372,0.84,0.81,0.039427724440366255,0.81,0.039427724440366255,0.03684529491774706,global_piqa_completions_por_latn_braz,0.7192857142857143,0.0085040787595509,hatebr_offensive_binary,0.6978895246478873,0.008983825940093065,0.5131650232961318,0.6958500379239354,0.004789033974315843,0.0052031412663780365,hellaswag_poly_pt,0.6534057830390064,0.006630010494388711,lambada_poly_pt,5.644180975257726,0.15705029308642518,0.5453317322125488,0.004313965755196941,mmlu_poly_pt,0.4610478359908884,0.3176470588235294,0.46,0.32323232323232326,0.4125,0.5,0.425,0.5,0.625,0.4125,0.4025974025974026,0.4625,0.4875,0.5875,0.4125,0.475,0.5641025641025641,0.425,0.5384615384615384,0.45,0.5128205128205128,0.4875,0.4,0.4125,0.5125,0.5125,0.4125,0.4625,0.006139266785793201,0.02916637349408555,0.028644580323293214,0.027053810739649554,0.03177760017836827,0.03230428206694653,0.031867759976852796,0.03225163605647273,0.031155122396704982,0.03181927495664781,0.03219039094333511,0.032228211307856025,0.032255549820116185,0.03173916774964969,0.031825649528523174,0.03229273305042174,0.03251872992928078,0.031890993362460617,0.0325693932577312,0.032117157984484154,0.0326919032572382,0.032212904551237334,0.03153907674098193,0.031756210319758844,0.03232148142256666,0.03231819881478497,0.03172129909532893,0.03226182772554558,oab_exams,0.654524089306698,0.011566488765228757,portuguese_hate_speech_binary,0.645332221957609,0.011757266261496169,0.754726368159204,0.006777343768244607,tweetsentbr,0.7379579758108546,0.007141035804791783