DataSnake's picture
Upload 2 files
d752c92 verified
,,Mistral-Nemo-Instruct-2407-NVFP4,,Mistral-Nemo-Instruct-2407-NVFP4-4over6,,Mistral-Nemo-Instruct-2407-NVFP4-FP8-RTN,,Mistral-Nemo-Instruct-2407-NVFP4-FP8,
Task,Metric,Value,Stderr,Value,Stderr,Value,Stderr,Value,Stderr
coqa,em,0.5392,0.0199,0.5498,0.0202,0.5683,0.0196,0.5733,0.0195
,f1,0.7182,0.0151,0.7212,0.0154,0.7401,0.0142,0.7347,0.0150
hellaswag,acc,0.6186,0.0048,0.6194,0.0048,0.6238,0.0048,0.6240,0.0048
,acc_norm,0.8084,0.0039,0.8132,0.0039,0.8140,0.0039,0.8125,0.0039
ifeval,inst_level_loose_acc,0.5456,N/A,0.5683,N/A,0.5564,N/A,0.5767,N/A
,inst_level_strict_acc,0.4712,N/A,0.5096,N/A,0.5012,N/A,0.5108,N/A
,prompt_level_loose_acc,0.4603,0.0214,0.4824,0.0215,0.4621,0.0215,0.4917,0.0215
,prompt_level_strict_acc,0.3808,0.0209,0.4122,0.0212,0.3993,0.0211,0.4196,0.0212
lambada_openai,acc,0.7584,0.0060,0.7687,0.0059,0.7619,0.0059,0.7726,0.0058
,perplexity,3.0229,0.0563,2.9546,0.0541,2.9591,0.0556,2.9233,0.0542
lambada_openai_cloze,acc,0.3122,0.0065,0.2983,0.0064,0.3315,0.0066,0.3317,0.0066
,perplexity,29.8427,0.7625,30.0355,0.7780,26.6970,0.6838,26.6948,0.6858
lambada_standard,acc,0.6885,0.0065,0.6907,0.0064,0.6971,0.0064,0.6926,0.0064
,perplexity,3.6401,0.0766,3.6600,0.0756,3.4930,0.0721,3.5514,0.0734
lambada_standard_cloze,acc,0.2259,0.0058,0.2467,0.0060,0.2583,0.0061,0.2837,0.0063
,perplexity,44.8440,1.1469,40.9925,1.0271,37.4110,0.9371,35.5615,0.8741
commonsense_qa,acc,0.5774,0.0141,0.5921,0.0141,0.6061,0.0140,0.6208,0.0139
mmlu,acc,0.6325,0.0038,0.6364,0.0038,0.6434,0.0038,0.6454,0.0038
,acc,0.5673,0.0067,0.5779,0.0067,0.5819,0.0067,0.5864,0.0067
,acc,0.7123,0.0078,0.7110,0.0078,0.7210,0.0078,0.7277,0.0077
,acc,0.7491,0.0076,0.7504,0.0076,0.7563,0.0076,0.7563,0.0076
,acc,0.5373,0.0085,0.5392,0.0085,0.5487,0.0084,0.5442,0.0085
openbookqa,acc,0.3680,0.0216,0.3920,0.0219,0.4040,0.0220,0.4040,0.0220
,acc_norm,0.4700,0.0223,0.4720,0.0223,0.4780,0.0224,0.4880,0.0224
winogrande,acc,0.7672,0.0119,0.7553,0.0121,0.7514,0.0121,0.7545,0.0121
triviaqa,exact_match,0.5953,0.0037,0.6011,0.0037,0.6105,0.0036,0.6184,0.0036
truthfulqa_mc1,acc,0.3782,0.0170,0.3831,0.0170,0.3892,0.0171,0.3917,0.0171
truthfulqa_mc2,acc,0.5284,0.0150,0.5367,0.0149,0.5390,0.0151,0.5475,0.0150