| { |
| "Mistral-Nemo-Instruct-2407-NVFP4": { |
| "coqa": { |
| "alias": "coqa", |
| "em,none": 0.5391666666666667, |
| "em_stderr,none": 0.019942406218865946, |
| "f1,none": 0.7182411035342305, |
| "f1_stderr,none": 0.015116252267763605 |
| }, |
| "hellaswag": { |
| "alias": "hellaswag", |
| "acc,none": 0.6186018721370244, |
| "acc_stderr,none": 0.004847372670134442, |
| "acc_norm,none": 0.8084047002589125, |
| "acc_norm_stderr,none": 0.003927519561403822 |
| }, |
| "ifeval": { |
| "alias": "ifeval", |
| "prompt_level_strict_acc,none": 0.3807763401109057, |
| "prompt_level_strict_acc_stderr,none": 0.020895937888190826, |
| "inst_level_strict_acc,none": 0.4712230215827338, |
| "inst_level_strict_acc_stderr,none": "N/A", |
| "prompt_level_loose_acc,none": 0.4602587800369686, |
| "prompt_level_loose_acc_stderr,none": 0.021448501434135032, |
| "inst_level_loose_acc,none": 0.5455635491606715, |
| "inst_level_loose_acc_stderr,none": "N/A" |
| }, |
| "lambada_openai": { |
| "alias": "lambada_openai", |
| "perplexity,none": 3.0228658176330048, |
| "perplexity_stderr,none": 0.05627099222260382, |
| "acc,none": 0.7583931690277508, |
| "acc_stderr,none": 0.0059636738430670555 |
| }, |
| "lambada_openai_cloze_yaml": { |
| "alias": "lambada_openai_cloze_yaml", |
| "perplexity,none": 29.84274462902617, |
| "perplexity_stderr,none": 0.7625202924355495, |
| "acc,none": 0.3122452940034931, |
| "acc_stderr,none": 0.0064561975253284295 |
| }, |
| "lambada_standard": { |
| "alias": "lambada_standard", |
| "perplexity,none": 3.640092189793144, |
| "perplexity_stderr,none": 0.07656519320424794, |
| "acc,none": 0.688530952843004, |
| "acc_stderr,none": 0.006451805320261074 |
| }, |
| "lambada_standard_cloze_yaml": { |
| "alias": "lambada_standard_cloze_yaml", |
| "perplexity,none": 44.843956670053444, |
| "perplexity_stderr,none": 1.1468558717049042, |
| "acc,none": 0.22588783233068116, |
| "acc_stderr,none": 0.005825865294666935 |
| }, |
| "commonsense_qa": { |
| "alias": "commonsense_qa", |
| "acc,none": 0.5773955773955773, |
| "acc_stderr,none": 0.014142423233580207 |
| }, |
| "mmlu": { |
| "acc,none": 0.6324597635664435, |
| "acc_stderr,none": 0.0037954721741336904, |
| "alias": "mmlu" |
| }, |
| "openbookqa": { |
| "alias": "openbookqa", |
| "acc,none": 0.368, |
| "acc_stderr,none": 0.021588982568353548, |
| "acc_norm,none": 0.47, |
| "acc_norm_stderr,none": 0.0223427481925028 |
| }, |
| "winogrande": { |
| "alias": "winogrande", |
| "acc,none": 0.7671665351223362, |
| "acc_stderr,none": 0.011878201073856598 |
| }, |
| "triviaqa": { |
| "alias": "triviaqa", |
| "exact_match,remove_whitespace": 0.595296477931342, |
| "exact_match_stderr,remove_whitespace": 0.003664271290957409 |
| }, |
| "truthfulqa_mc1": { |
| "alias": "truthfulqa_mc1", |
| "acc,none": 0.37821297429620565, |
| "acc_stderr,none": 0.016976335907546772 |
| }, |
| "truthfulqa_mc2": { |
| "alias": "truthfulqa_mc2", |
| "acc,none": 0.5284457708979753, |
| "acc_stderr,none": 0.015035693028473887 |
| } |
| }, |
| "Mistral-Nemo-Instruct-2407-NVFP4-4over6": { |
| "coqa": { |
| "alias": "coqa", |
| "em,none": 0.5498333333333334, |
| "em_stderr,none": 0.020187689512123422, |
| "f1,none": 0.7211863549140475, |
| "f1_stderr,none": 0.01540089217977273 |
| }, |
| "hellaswag": { |
| "alias": "hellaswag", |
| "acc,none": 0.619398526190002, |
| "acc_stderr,none": 0.004845424524763779, |
| "acc_norm,none": 0.8131846245767775, |
| "acc_norm_stderr,none": 0.003889666837869389 |
| }, |
| "ifeval": { |
| "alias": "ifeval", |
| "prompt_level_strict_acc,none": 0.4121996303142329, |
| "prompt_level_strict_acc_stderr,none": 0.021182238151733295, |
| "inst_level_strict_acc,none": 0.5095923261390888, |
| "inst_level_strict_acc_stderr,none": "N/A", |
| "prompt_level_loose_acc,none": 0.4824399260628466, |
| "prompt_level_loose_acc_stderr,none": 0.02150330051338897, |
| "inst_level_loose_acc,none": 0.5683453237410072, |
| "inst_level_loose_acc_stderr,none": "N/A" |
| }, |
| "lambada_openai": { |
| "alias": "lambada_openai", |
| "perplexity,none": 2.954572513479153, |
| "perplexity_stderr,none": 0.054124688702298446, |
| "acc,none": 0.7686784397438385, |
| "acc_stderr,none": 0.0058747917899013785 |
| }, |
| "lambada_openai_cloze_yaml": { |
| "alias": "lambada_openai_cloze_yaml", |
| "perplexity,none": 30.035509240670926, |
| "perplexity_stderr,none": 0.7780223123588234, |
| "acc,none": 0.29827285076654375, |
| "acc_stderr,none": 0.006373868144287074 |
| }, |
| "lambada_standard": { |
| "alias": "lambada_standard", |
| "perplexity,none": 3.660039154584711, |
| "perplexity_stderr,none": 0.07563902292271452, |
| "acc,none": 0.6906656316708714, |
| "acc_stderr,none": 0.006439617662597691 |
| }, |
| "lambada_standard_cloze_yaml": { |
| "alias": "lambada_standard_cloze_yaml", |
| "perplexity,none": 40.99251979932923, |
| "perplexity_stderr,none": 1.0271212425445286, |
| "acc,none": 0.24665243547448087, |
| "acc_stderr,none": 0.006005545631215194 |
| }, |
| "commonsense_qa": { |
| "alias": "commonsense_qa", |
| "acc,none": 0.5921375921375921, |
| "acc_stderr,none": 0.014069810259917194 |
| }, |
| "mmlu": { |
| "acc,none": 0.6364477994587665, |
| "acc_stderr,none": 0.003806377839571922, |
| "alias": "mmlu" |
| }, |
| "openbookqa": { |
| "alias": "openbookqa", |
| "acc,none": 0.392, |
| "acc_stderr,none": 0.02185468495561119, |
| "acc_norm,none": 0.472, |
| "acc_norm_stderr,none": 0.022347949832668024 |
| }, |
| "winogrande": { |
| "alias": "winogrande", |
| "acc,none": 0.755327545382794, |
| "acc_stderr,none": 0.012082125654159727 |
| }, |
| "triviaqa": { |
| "alias": "triviaqa", |
| "exact_match,remove_whitespace": 0.6011480160499332, |
| "exact_match_stderr,remove_whitespace": 0.003655519111850352 |
| }, |
| "truthfulqa_mc1": { |
| "alias": "truthfulqa_mc1", |
| "acc,none": 0.38310893512851896, |
| "acc_stderr,none": 0.017018461679389734 |
| }, |
| "truthfulqa_mc2": { |
| "alias": "truthfulqa_mc2", |
| "acc,none": 0.5367421440427503, |
| "acc_stderr,none": 0.01489354828588867 |
| } |
| }, |
| "Mistral-Nemo-Instruct-2407-NVFP4-FP8-RTN": { |
| "coqa": { |
| "alias": "coqa", |
| "em,none": 0.5683333333333334, |
| "em_stderr,none": 0.019596946262820592, |
| "f1,none": 0.7401341567024432, |
| "f1_stderr,none": 0.014222135053403443 |
| }, |
| "hellaswag": { |
| "alias": "hellaswag", |
| "acc,none": 0.6237801234813782, |
| "acc_stderr,none": 0.004834461997944986, |
| "acc_norm,none": 0.813981278629755, |
| "acc_norm_stderr,none": 0.003883265210791469 |
| }, |
| "ifeval": { |
| "alias": "ifeval", |
| "prompt_level_strict_acc,none": 0.39926062846580407, |
| "prompt_level_strict_acc_stderr,none": 0.021075331332701258, |
| "inst_level_strict_acc,none": 0.5011990407673861, |
| "inst_level_strict_acc_stderr,none": "N/A", |
| "prompt_level_loose_acc,none": 0.46210720887245843, |
| "prompt_level_loose_acc_stderr,none": 0.021454695436204742, |
| "inst_level_loose_acc,none": 0.5563549160671463, |
| "inst_level_loose_acc_stderr,none": "N/A" |
| }, |
| "lambada_openai": { |
| "alias": "lambada_openai", |
| "perplexity,none": 2.959118986596569, |
| "perplexity_stderr,none": 0.05562968630849542, |
| "acc,none": 0.7618862798369882, |
| "acc_stderr,none": 0.005934024831865026 |
| }, |
| "lambada_openai_cloze_yaml": { |
| "alias": "lambada_openai_cloze_yaml", |
| "perplexity,none": 26.696978874621955, |
| "perplexity_stderr,none": 0.683775381967173, |
| "acc,none": 0.33145740345429847, |
| "acc_stderr,none": 0.006558287884402134 |
| }, |
| "lambada_standard": { |
| "alias": "lambada_standard", |
| "perplexity,none": 3.492972951792885, |
| "perplexity_stderr,none": 0.0721179626815854, |
| "acc,none": 0.6970696681544731, |
| "acc_stderr,none": 0.006402086620816973 |
| }, |
| "lambada_standard_cloze_yaml": { |
| "alias": "lambada_standard_cloze_yaml", |
| "perplexity,none": 37.411029419339414, |
| "perplexity_stderr,none": 0.9371152123007664, |
| "acc,none": 0.2582961381719387, |
| "acc_stderr,none": 0.0060979842659205745 |
| }, |
| "commonsense_qa": { |
| "alias": "commonsense_qa", |
| "acc,none": 0.6060606060606061, |
| "acc_stderr,none": 0.013989198052984327 |
| }, |
| "mmlu": { |
| "acc,none": 0.6434268622703319, |
| "acc_stderr,none": 0.0037824170513249015, |
| "alias": "mmlu" |
| }, |
| "openbookqa": { |
| "alias": "openbookqa", |
| "acc,none": 0.404, |
| "acc_stderr,none": 0.021966635293832883, |
| "acc_norm,none": 0.478, |
| "acc_norm_stderr,none": 0.02236139673920787 |
| }, |
| "winogrande": { |
| "alias": "winogrande", |
| "acc,none": 0.7513812154696132, |
| "acc_stderr,none": 0.012147314713403173 |
| }, |
| "triviaqa": { |
| "alias": "triviaqa", |
| "exact_match,remove_whitespace": 0.6104547481052163, |
| "exact_match_stderr,remove_whitespace": 0.0036404759558834486 |
| }, |
| "truthfulqa_mc1": { |
| "alias": "truthfulqa_mc1", |
| "acc,none": 0.38922888616891066, |
| "acc_stderr,none": 0.01706855268069044 |
| }, |
| "truthfulqa_mc2": { |
| "alias": "truthfulqa_mc2", |
| "acc,none": 0.5389966378633866, |
| "acc_stderr,none": 0.015062753562771725 |
| } |
| }, |
| "Mistral-Nemo-Instruct-2407-NVFP4-FP8": { |
| "coqa": { |
| "alias": "coqa", |
| "em,none": 0.5733333333333334, |
| "em_stderr,none": 0.019505122108457063, |
| "f1,none": 0.7346963191068078, |
| "f1_stderr,none": 0.015019925951016882 |
| }, |
| "hellaswag": { |
| "alias": "hellaswag", |
| "acc,none": 0.6239792869946226, |
| "acc_stderr,none": 0.004833953712521647, |
| "acc_norm,none": 0.8124875522804222, |
| "acc_norm_stderr,none": 0.0038952463204528333 |
| }, |
| "ifeval": { |
| "alias": "ifeval", |
| "prompt_level_strict_acc,none": 0.4195933456561922, |
| "prompt_level_strict_acc_stderr,none": 0.021236532548855144, |
| "inst_level_strict_acc,none": 0.5107913669064749, |
| "inst_level_strict_acc_stderr,none": "N/A", |
| "prompt_level_loose_acc,none": 0.49168207024029575, |
| "prompt_level_loose_acc_stderr,none": 0.02151359656402127, |
| "inst_level_loose_acc,none": 0.5767386091127098, |
| "inst_level_loose_acc_stderr,none": "N/A" |
| }, |
| "lambada_openai": { |
| "alias": "lambada_openai", |
| "perplexity,none": 2.923326330881571, |
| "perplexity_stderr,none": 0.05419037402311029, |
| "acc,none": 0.7725596739763245, |
| "acc_stderr,none": 0.005839986255519642 |
| }, |
| "lambada_openai_cloze_yaml": { |
| "alias": "lambada_openai_cloze_yaml", |
| "perplexity,none": 26.694758918393248, |
| "perplexity_stderr,none": 0.6858369120887257, |
| "acc,none": 0.33165146516592275, |
| "acc_stderr,none": 0.0065592552732574244 |
| }, |
| "lambada_standard": { |
| "alias": "lambada_standard", |
| "perplexity,none": 3.551356285449324, |
| "perplexity_stderr,none": 0.07342709527808654, |
| "acc,none": 0.6926062487871143, |
| "acc_stderr,none": 0.006428398527904964 |
| }, |
| "lambada_standard_cloze_yaml": { |
| "alias": "lambada_standard_cloze_yaml", |
| "perplexity,none": 35.56146784383732, |
| "perplexity_stderr,none": 0.8740618781960072, |
| "acc,none": 0.2837182223947215, |
| "acc_stderr,none": 0.0062805494483705775 |
| }, |
| "commonsense_qa": { |
| "alias": "commonsense_qa", |
| "acc,none": 0.6208026208026208, |
| "acc_stderr,none": 0.013890872306969766 |
| }, |
| "mmlu": { |
| "acc,none": 0.6454208802164934, |
| "acc_stderr,none": 0.0037774970297410814, |
| "alias": "mmlu" |
| }, |
| "openbookqa": { |
| "alias": "openbookqa", |
| "acc,none": 0.404, |
| "acc_stderr,none": 0.021966635293832883, |
| "acc_norm,none": 0.488, |
| "acc_norm_stderr,none": 0.022376626797927058 |
| }, |
| "winogrande": { |
| "alias": "winogrande", |
| "acc,none": 0.7545382794001578, |
| "acc_stderr,none": 0.01209527293718361 |
| }, |
| "triviaqa": { |
| "alias": "triviaqa", |
| "exact_match,remove_whitespace": 0.6184239857333927, |
| "exact_match_stderr,remove_whitespace": 0.003626487357735664 |
| }, |
| "truthfulqa_mc1": { |
| "alias": "truthfulqa_mc1", |
| "acc,none": 0.39167686658506734, |
| "acc_stderr,none": 0.017087795881769625 |
| }, |
| "truthfulqa_mc2": { |
| "alias": "truthfulqa_mc2", |
| "acc,none": 0.5475484445657804, |
| "acc_stderr,none": 0.015041357055984873 |
| } |
| } |
| } |