{ "Mistral-Nemo-Instruct-2407-NVFP4": { "coqa": { "alias": "coqa", "em,none": 0.5391666666666667, "em_stderr,none": 0.019942406218865946, "f1,none": 0.7182411035342305, "f1_stderr,none": 0.015116252267763605 }, "hellaswag": { "alias": "hellaswag", "acc,none": 0.6186018721370244, "acc_stderr,none": 0.004847372670134442, "acc_norm,none": 0.8084047002589125, "acc_norm_stderr,none": 0.003927519561403822 }, "ifeval": { "alias": "ifeval", "prompt_level_strict_acc,none": 0.3807763401109057, "prompt_level_strict_acc_stderr,none": 0.020895937888190826, "inst_level_strict_acc,none": 0.4712230215827338, "inst_level_strict_acc_stderr,none": "N/A", "prompt_level_loose_acc,none": 0.4602587800369686, "prompt_level_loose_acc_stderr,none": 0.021448501434135032, "inst_level_loose_acc,none": 0.5455635491606715, "inst_level_loose_acc_stderr,none": "N/A" }, "lambada_openai": { "alias": "lambada_openai", "perplexity,none": 3.0228658176330048, "perplexity_stderr,none": 0.05627099222260382, "acc,none": 0.7583931690277508, "acc_stderr,none": 0.0059636738430670555 }, "lambada_openai_cloze_yaml": { "alias": "lambada_openai_cloze_yaml", "perplexity,none": 29.84274462902617, "perplexity_stderr,none": 0.7625202924355495, "acc,none": 0.3122452940034931, "acc_stderr,none": 0.0064561975253284295 }, "lambada_standard": { "alias": "lambada_standard", "perplexity,none": 3.640092189793144, "perplexity_stderr,none": 0.07656519320424794, "acc,none": 0.688530952843004, "acc_stderr,none": 0.006451805320261074 }, "lambada_standard_cloze_yaml": { "alias": "lambada_standard_cloze_yaml", "perplexity,none": 44.843956670053444, "perplexity_stderr,none": 1.1468558717049042, "acc,none": 0.22588783233068116, "acc_stderr,none": 0.005825865294666935 }, "commonsense_qa": { "alias": "commonsense_qa", "acc,none": 0.5773955773955773, "acc_stderr,none": 0.014142423233580207 }, "mmlu": { "acc,none": 0.6324597635664435, "acc_stderr,none": 0.0037954721741336904, "alias": "mmlu" }, "openbookqa": { "alias": "openbookqa", "acc,none": 0.368, "acc_stderr,none": 0.021588982568353548, "acc_norm,none": 0.47, "acc_norm_stderr,none": 0.0223427481925028 }, "winogrande": { "alias": "winogrande", "acc,none": 0.7671665351223362, "acc_stderr,none": 0.011878201073856598 }, "triviaqa": { "alias": "triviaqa", "exact_match,remove_whitespace": 0.595296477931342, "exact_match_stderr,remove_whitespace": 0.003664271290957409 }, "truthfulqa_mc1": { "alias": "truthfulqa_mc1", "acc,none": 0.37821297429620565, "acc_stderr,none": 0.016976335907546772 }, "truthfulqa_mc2": { "alias": "truthfulqa_mc2", "acc,none": 0.5284457708979753, "acc_stderr,none": 0.015035693028473887 } }, "Mistral-Nemo-Instruct-2407-NVFP4-4over6": { "coqa": { "alias": "coqa", "em,none": 0.5498333333333334, "em_stderr,none": 0.020187689512123422, "f1,none": 0.7211863549140475, "f1_stderr,none": 0.01540089217977273 }, "hellaswag": { "alias": "hellaswag", "acc,none": 0.619398526190002, "acc_stderr,none": 0.004845424524763779, "acc_norm,none": 0.8131846245767775, "acc_norm_stderr,none": 0.003889666837869389 }, "ifeval": { "alias": "ifeval", "prompt_level_strict_acc,none": 0.4121996303142329, "prompt_level_strict_acc_stderr,none": 0.021182238151733295, "inst_level_strict_acc,none": 0.5095923261390888, "inst_level_strict_acc_stderr,none": "N/A", "prompt_level_loose_acc,none": 0.4824399260628466, "prompt_level_loose_acc_stderr,none": 0.02150330051338897, "inst_level_loose_acc,none": 0.5683453237410072, "inst_level_loose_acc_stderr,none": "N/A" }, "lambada_openai": { "alias": "lambada_openai", "perplexity,none": 2.954572513479153, "perplexity_stderr,none": 0.054124688702298446, "acc,none": 0.7686784397438385, "acc_stderr,none": 0.0058747917899013785 }, "lambada_openai_cloze_yaml": { "alias": "lambada_openai_cloze_yaml", "perplexity,none": 30.035509240670926, "perplexity_stderr,none": 0.7780223123588234, "acc,none": 0.29827285076654375, "acc_stderr,none": 0.006373868144287074 }, "lambada_standard": { "alias": "lambada_standard", "perplexity,none": 3.660039154584711, "perplexity_stderr,none": 0.07563902292271452, "acc,none": 0.6906656316708714, "acc_stderr,none": 0.006439617662597691 }, "lambada_standard_cloze_yaml": { "alias": "lambada_standard_cloze_yaml", "perplexity,none": 40.99251979932923, "perplexity_stderr,none": 1.0271212425445286, "acc,none": 0.24665243547448087, "acc_stderr,none": 0.006005545631215194 }, "commonsense_qa": { "alias": "commonsense_qa", "acc,none": 0.5921375921375921, "acc_stderr,none": 0.014069810259917194 }, "mmlu": { "acc,none": 0.6364477994587665, "acc_stderr,none": 0.003806377839571922, "alias": "mmlu" }, "openbookqa": { "alias": "openbookqa", "acc,none": 0.392, "acc_stderr,none": 0.02185468495561119, "acc_norm,none": 0.472, "acc_norm_stderr,none": 0.022347949832668024 }, "winogrande": { "alias": "winogrande", "acc,none": 0.755327545382794, "acc_stderr,none": 0.012082125654159727 }, "triviaqa": { "alias": "triviaqa", "exact_match,remove_whitespace": 0.6011480160499332, "exact_match_stderr,remove_whitespace": 0.003655519111850352 }, "truthfulqa_mc1": { "alias": "truthfulqa_mc1", "acc,none": 0.38310893512851896, "acc_stderr,none": 0.017018461679389734 }, "truthfulqa_mc2": { "alias": "truthfulqa_mc2", "acc,none": 0.5367421440427503, "acc_stderr,none": 0.01489354828588867 } }, "Mistral-Nemo-Instruct-2407-NVFP4-FP8-RTN": { "coqa": { "alias": "coqa", "em,none": 0.5683333333333334, "em_stderr,none": 0.019596946262820592, "f1,none": 0.7401341567024432, "f1_stderr,none": 0.014222135053403443 }, "hellaswag": { "alias": "hellaswag", "acc,none": 0.6237801234813782, "acc_stderr,none": 0.004834461997944986, "acc_norm,none": 0.813981278629755, "acc_norm_stderr,none": 0.003883265210791469 }, "ifeval": { "alias": "ifeval", "prompt_level_strict_acc,none": 0.39926062846580407, "prompt_level_strict_acc_stderr,none": 0.021075331332701258, "inst_level_strict_acc,none": 0.5011990407673861, "inst_level_strict_acc_stderr,none": "N/A", "prompt_level_loose_acc,none": 0.46210720887245843, "prompt_level_loose_acc_stderr,none": 0.021454695436204742, "inst_level_loose_acc,none": 0.5563549160671463, "inst_level_loose_acc_stderr,none": "N/A" }, "lambada_openai": { "alias": "lambada_openai", "perplexity,none": 2.959118986596569, "perplexity_stderr,none": 0.05562968630849542, "acc,none": 0.7618862798369882, "acc_stderr,none": 0.005934024831865026 }, "lambada_openai_cloze_yaml": { "alias": "lambada_openai_cloze_yaml", "perplexity,none": 26.696978874621955, "perplexity_stderr,none": 0.683775381967173, "acc,none": 0.33145740345429847, "acc_stderr,none": 0.006558287884402134 }, "lambada_standard": { "alias": "lambada_standard", "perplexity,none": 3.492972951792885, "perplexity_stderr,none": 0.0721179626815854, "acc,none": 0.6970696681544731, "acc_stderr,none": 0.006402086620816973 }, "lambada_standard_cloze_yaml": { "alias": "lambada_standard_cloze_yaml", "perplexity,none": 37.411029419339414, "perplexity_stderr,none": 0.9371152123007664, "acc,none": 0.2582961381719387, "acc_stderr,none": 0.0060979842659205745 }, "commonsense_qa": { "alias": "commonsense_qa", "acc,none": 0.6060606060606061, "acc_stderr,none": 0.013989198052984327 }, "mmlu": { "acc,none": 0.6434268622703319, "acc_stderr,none": 0.0037824170513249015, "alias": "mmlu" }, "openbookqa": { "alias": "openbookqa", "acc,none": 0.404, "acc_stderr,none": 0.021966635293832883, "acc_norm,none": 0.478, "acc_norm_stderr,none": 0.02236139673920787 }, "winogrande": { "alias": "winogrande", "acc,none": 0.7513812154696132, "acc_stderr,none": 0.012147314713403173 }, "triviaqa": { "alias": "triviaqa", "exact_match,remove_whitespace": 0.6104547481052163, "exact_match_stderr,remove_whitespace": 0.0036404759558834486 }, "truthfulqa_mc1": { "alias": "truthfulqa_mc1", "acc,none": 0.38922888616891066, "acc_stderr,none": 0.01706855268069044 }, "truthfulqa_mc2": { "alias": "truthfulqa_mc2", "acc,none": 0.5389966378633866, "acc_stderr,none": 0.015062753562771725 } }, "Mistral-Nemo-Instruct-2407-NVFP4-FP8": { "coqa": { "alias": "coqa", "em,none": 0.5733333333333334, "em_stderr,none": 0.019505122108457063, "f1,none": 0.7346963191068078, "f1_stderr,none": 0.015019925951016882 }, "hellaswag": { "alias": "hellaswag", "acc,none": 0.6239792869946226, "acc_stderr,none": 0.004833953712521647, "acc_norm,none": 0.8124875522804222, "acc_norm_stderr,none": 0.0038952463204528333 }, "ifeval": { "alias": "ifeval", "prompt_level_strict_acc,none": 0.4195933456561922, "prompt_level_strict_acc_stderr,none": 0.021236532548855144, "inst_level_strict_acc,none": 0.5107913669064749, "inst_level_strict_acc_stderr,none": "N/A", "prompt_level_loose_acc,none": 0.49168207024029575, "prompt_level_loose_acc_stderr,none": 0.02151359656402127, "inst_level_loose_acc,none": 0.5767386091127098, "inst_level_loose_acc_stderr,none": "N/A" }, "lambada_openai": { "alias": "lambada_openai", "perplexity,none": 2.923326330881571, "perplexity_stderr,none": 0.05419037402311029, "acc,none": 0.7725596739763245, "acc_stderr,none": 0.005839986255519642 }, "lambada_openai_cloze_yaml": { "alias": "lambada_openai_cloze_yaml", "perplexity,none": 26.694758918393248, "perplexity_stderr,none": 0.6858369120887257, "acc,none": 0.33165146516592275, "acc_stderr,none": 0.0065592552732574244 }, "lambada_standard": { "alias": "lambada_standard", "perplexity,none": 3.551356285449324, "perplexity_stderr,none": 0.07342709527808654, "acc,none": 0.6926062487871143, "acc_stderr,none": 0.006428398527904964 }, "lambada_standard_cloze_yaml": { "alias": "lambada_standard_cloze_yaml", "perplexity,none": 35.56146784383732, "perplexity_stderr,none": 0.8740618781960072, "acc,none": 0.2837182223947215, "acc_stderr,none": 0.0062805494483705775 }, "commonsense_qa": { "alias": "commonsense_qa", "acc,none": 0.6208026208026208, "acc_stderr,none": 0.013890872306969766 }, "mmlu": { "acc,none": 0.6454208802164934, "acc_stderr,none": 0.0037774970297410814, "alias": "mmlu" }, "openbookqa": { "alias": "openbookqa", "acc,none": 0.404, "acc_stderr,none": 0.021966635293832883, "acc_norm,none": 0.488, "acc_norm_stderr,none": 0.022376626797927058 }, "winogrande": { "alias": "winogrande", "acc,none": 0.7545382794001578, "acc_stderr,none": 0.01209527293718361 }, "triviaqa": { "alias": "triviaqa", "exact_match,remove_whitespace": 0.6184239857333927, "exact_match_stderr,remove_whitespace": 0.003626487357735664 }, "truthfulqa_mc1": { "alias": "truthfulqa_mc1", "acc,none": 0.39167686658506734, "acc_stderr,none": 0.017087795881769625 }, "truthfulqa_mc2": { "alias": "truthfulqa_mc2", "acc,none": 0.5475484445657804, "acc_stderr,none": 0.015041357055984873 } } }