Upload nemotron_fineinstructions_1T_judged_exp_chat_300M/metrics.eval.jsonl with huggingface_hub
Browse files
nemotron_fineinstructions_1T_judged_exp_chat_300M/metrics.eval.jsonl
CHANGED
|
@@ -1,2 +1,5 @@
|
|
| 1 |
-
{"created_at": "
|
| 2 |
-
{"created_at": "
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"created_at": "2026-01-24T06:00:21.593213", "global_step": 30000, "commonsense_qa": {"alias": "commonsense_qa", "acc,none": 0.18673218673218672, "acc_stderr,none": 0.011156975219176343}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.38627763393746267, "acc_stderr,none": 0.0048590041846946095, "acc_norm,none": 0.48685520812587135, "acc_norm_stderr,none": 0.004988056789119668}, "mmlu": {"acc,none": 0.25181598062953997, "acc_stderr,none": 0.0036611273506700058, "alias": "mmlu"}, "mmlu_humanities": {"acc,none": 0.251009564293305, "acc_stderr,none": 0.006317842442546608, "alias": " - humanities"}, "mmlu_formal_logic": {"alias": " - formal_logic", "acc,none": 0.21428571428571427, "acc_stderr,none": 0.03670066451047181}, "mmlu_high_school_european_history": {"alias": " - high_school_european_history", "acc,none": 0.2, "acc_stderr,none": 0.03123475237772118}, "mmlu_high_school_us_history": {"alias": " - high_school_us_history", "acc,none": 0.25, "acc_stderr,none": 0.03039153369274154}, "mmlu_high_school_world_history": {"alias": " - high_school_world_history", "acc,none": 0.23628691983122363, "acc_stderr,none": 0.027652153144159274}, "mmlu_international_law": {"alias": " - international_law", "acc,none": 0.256198347107438, "acc_stderr,none": 0.03984979653302871}, "mmlu_jurisprudence": {"alias": " - jurisprudence", "acc,none": 0.21296296296296297, "acc_stderr,none": 0.039578354719809805}, "mmlu_logical_fallacies": {"alias": " - logical_fallacies", "acc,none": 0.294478527607362, "acc_stderr,none": 0.03581165790474082}, "mmlu_moral_disputes": {"alias": " - moral_disputes", "acc,none": 0.2138728323699422, "acc_stderr,none": 0.022075709251757173}, "mmlu_moral_scenarios": {"alias": " - moral_scenarios", "acc,none": 0.27932960893854747, "acc_stderr,none": 0.015005762446786166}, "mmlu_philosophy": {"alias": " - philosophy", "acc,none": 0.3054662379421222, "acc_stderr,none": 0.026160584450140478}, "mmlu_prehistory": {"alias": " - prehistory", "acc,none": 0.23765432098765432, "acc_stderr,none": 0.023683591837008557}, "mmlu_professional_law": {"alias": " - professional_law", "acc,none": 0.24511082138200782, "acc_stderr,none": 0.010986307870045528}, "mmlu_world_religions": {"alias": " - world_religions", "acc,none": 0.23391812865497075, "acc_stderr,none": 0.03246721765117825}, "mmlu_other": {"acc,none": 0.2574831026713872, "acc_stderr,none": 0.007845845216601652, "alias": " - other"}, "mmlu_business_ethics": {"alias": " - business_ethics", "acc,none": 0.25, "acc_stderr,none": 0.04351941398892446}, "mmlu_clinical_knowledge": {"alias": " - clinical_knowledge", "acc,none": 0.21132075471698114, "acc_stderr,none": 0.025125766484827845}, "mmlu_college_medicine": {"alias": " - college_medicine", "acc,none": 0.26011560693641617, "acc_stderr,none": 0.033450369167889925}, "mmlu_global_facts": {"alias": " - global_facts", "acc,none": 0.29, "acc_stderr,none": 0.045604802157206845}, "mmlu_human_aging": {"alias": " - human_aging", "acc,none": 0.2600896860986547, "acc_stderr,none": 0.029442495585857476}, "mmlu_management": {"alias": " - management", "acc,none": 0.32038834951456313, "acc_stderr,none": 0.046202840822800406}, "mmlu_marketing": {"alias": " - marketing", "acc,none": 0.23931623931623933, "acc_stderr,none": 0.027951826808924333}, "mmlu_medical_genetics": {"alias": " - medical_genetics", "acc,none": 0.31, "acc_stderr,none": 0.04648231987117316}, "mmlu_miscellaneous": {"alias": " - miscellaneous", "acc,none": 0.25925925925925924, "acc_stderr,none": 0.01567100600933958}, "mmlu_nutrition": {"alias": " - nutrition", "acc,none": 0.2581699346405229, "acc_stderr,none": 0.02505850331695816}, "mmlu_professional_accounting": {"alias": " - professional_accounting", "acc,none": 0.2553191489361702, "acc_stderr,none": 0.02601199293090201}, "mmlu_professional_medicine": {"alias": " - professional_medicine", "acc,none": 0.22426470588235295, "acc_stderr,none": 0.025336848563332386}, "mmlu_virology": {"alias": " - virology", "acc,none": 0.3132530120481928, "acc_stderr,none": 0.03610805018031023}, "mmlu_social_sciences": {"acc,none": 0.2365940851478713, "acc_stderr,none": 0.007654088178391041, "alias": " - social sciences"}, "mmlu_econometrics": {"alias": " - econometrics", "acc,none": 0.2982456140350877, "acc_stderr,none": 0.04303684033537315}, "mmlu_high_school_geography": {"alias": " - high_school_geography", "acc,none": 0.2474747474747475, "acc_stderr,none": 0.0307463007421245}, "mmlu_high_school_government_and_politics": {"alias": " - high_school_government_and_politics", "acc,none": 0.23834196891191708, "acc_stderr,none": 0.030748905363909895}, "mmlu_high_school_macroeconomics": {"alias": " - high_school_macroeconomics", "acc,none": 0.20256410256410257, "acc_stderr,none": 0.02037766097037138}, "mmlu_high_school_microeconomics": {"alias": " - high_school_microeconomics", "acc,none": 0.226890756302521, "acc_stderr,none": 0.027205371538279472}, "mmlu_high_school_psychology": {"alias": " - high_school_psychology", "acc,none": 0.21100917431192662, "acc_stderr,none": 0.017493922404112648}, "mmlu_human_sexuality": {"alias": " - human_sexuality", "acc,none": 0.3053435114503817, "acc_stderr,none": 0.040393149787245605}, "mmlu_professional_psychology": {"alias": " - professional_psychology", "acc,none": 0.2581699346405229, "acc_stderr,none": 0.01770453165325008}, "mmlu_public_relations": {"alias": " - public_relations", "acc,none": 0.2909090909090909, "acc_stderr,none": 0.04350271442923243}, "mmlu_security_studies": {"alias": " - security_studies", "acc,none": 0.20408163265306123, "acc_stderr,none": 0.025801283475090506}, "mmlu_sociology": {"alias": " - sociology", "acc,none": 0.263681592039801, "acc_stderr,none": 0.03115715086935556}, "mmlu_us_foreign_policy": {"alias": " - us_foreign_policy", "acc,none": 0.18, "acc_stderr,none": 0.038612291966536955}, "mmlu_stem": {"acc,none": 0.2622898826514431, "acc_stderr,none": 0.00783619334715781, "alias": " - stem"}, "mmlu_abstract_algebra": {"alias": " - abstract_algebra", "acc,none": 0.2, "acc_stderr,none": 0.04020151261036845}, "mmlu_anatomy": {"alias": " - anatomy", "acc,none": 0.3037037037037037, "acc_stderr,none": 0.039725528847851375}, "mmlu_astronomy": {"alias": " - astronomy", "acc,none": 0.2565789473684211, "acc_stderr,none": 0.0355418036802569}, "mmlu_college_biology": {"alias": " - college_biology", "acc,none": 0.25, "acc_stderr,none": 0.03621034121889507}, "mmlu_college_chemistry": {"alias": " - college_chemistry", "acc,none": 0.21, "acc_stderr,none": 0.040936018074033256}, "mmlu_college_computer_science": {"alias": " - college_computer_science", "acc,none": 0.18, "acc_stderr,none": 0.03861229196653695}, "mmlu_college_mathematics": {"alias": " - college_mathematics", "acc,none": 0.29, "acc_stderr,none": 0.045604802157206845}, "mmlu_college_physics": {"alias": " - college_physics", "acc,none": 0.2549019607843137, "acc_stderr,none": 0.04336432707993178}, "mmlu_computer_security": {"alias": " - computer_security", "acc,none": 0.18, "acc_stderr,none": 0.038612291966536934}, "mmlu_conceptual_physics": {"alias": " - conceptual_physics", "acc,none": 0.2680851063829787, "acc_stderr,none": 0.028957342788342347}, "mmlu_electrical_engineering": {"alias": " - electrical_engineering", "acc,none": 0.2827586206896552, "acc_stderr,none": 0.037528339580033376}, "mmlu_elementary_mathematics": {"alias": " - elementary_mathematics", "acc,none": 0.2671957671957672, "acc_stderr,none": 0.02278967314577657}, "mmlu_high_school_biology": {"alias": " - high_school_biology", "acc,none": 0.2870967741935484, "acc_stderr,none": 0.02573654274559453}, "mmlu_high_school_chemistry": {"alias": " - high_school_chemistry", "acc,none": 0.24630541871921183, "acc_stderr,none": 0.030315099285617732}, "mmlu_high_school_computer_science": {"alias": " - high_school_computer_science", "acc,none": 0.3, "acc_stderr,none": 0.046056618647183814}, "mmlu_high_school_mathematics": {"alias": " - high_school_mathematics", "acc,none": 0.2518518518518518, "acc_stderr,none": 0.02646611753895991}, "mmlu_high_school_physics": {"alias": " - high_school_physics", "acc,none": 0.2781456953642384, "acc_stderr,none": 0.03658603262763743}, "mmlu_high_school_statistics": {"alias": " - high_school_statistics", "acc,none": 0.27314814814814814, "acc_stderr,none": 0.03038805130167812}, "mmlu_machine_learning": {"alias": " - machine_learning", "acc,none": 0.32142857142857145, "acc_stderr,none": 0.04432804055291519}, "sciq": {"alias": "sciq", "acc,none": 0.851, "acc_stderr,none": 0.011266140684632168, "acc_norm,none": 0.783, "acc_norm_stderr,none": 0.01304151375727071}}
|
| 2 |
+
{"created_at": "2026-01-24T10:11:58.405622", "global_step": 60000, "commonsense_qa": {"alias": "commonsense_qa", "acc,none": 0.2022932022932023, "acc_stderr,none": 0.01150091452526044}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.40509858593905596, "acc_stderr,none": 0.004899078300184263, "acc_norm,none": 0.5189205337582155, "acc_norm_stderr,none": 0.004986207581862942}, "mmlu": {"acc,none": 0.23757299529981485, "acc_stderr,none": 0.00358543052703827, "alias": "mmlu"}, "mmlu_humanities": {"acc,none": 0.23464399574920297, "acc_stderr,none": 0.00617777193998706, "alias": " - humanities"}, "mmlu_formal_logic": {"alias": " - formal_logic", "acc,none": 0.19047619047619047, "acc_stderr,none": 0.035122074123020534}, "mmlu_high_school_european_history": {"alias": " - high_school_european_history", "acc,none": 0.18181818181818182, "acc_stderr,none": 0.030117688929503585}, "mmlu_high_school_us_history": {"alias": " - high_school_us_history", "acc,none": 0.19607843137254902, "acc_stderr,none": 0.027865942286639318}, "mmlu_high_school_world_history": {"alias": " - high_school_world_history", "acc,none": 0.25316455696202533, "acc_stderr,none": 0.028304657943035303}, "mmlu_international_law": {"alias": " - international_law", "acc,none": 0.3140495867768595, "acc_stderr,none": 0.04236964753041018}, "mmlu_jurisprudence": {"alias": " - jurisprudence", "acc,none": 0.2777777777777778, "acc_stderr,none": 0.043300437496507437}, "mmlu_logical_fallacies": {"alias": " - logical_fallacies", "acc,none": 0.25153374233128833, "acc_stderr,none": 0.03408997886857529}, "mmlu_moral_disputes": {"alias": " - moral_disputes", "acc,none": 0.22254335260115607, "acc_stderr,none": 0.022394215661942815}, "mmlu_moral_scenarios": {"alias": " - moral_scenarios", "acc,none": 0.2424581005586592, "acc_stderr,none": 0.014333522059217892}, "mmlu_philosophy": {"alias": " - philosophy", "acc,none": 0.2090032154340836, "acc_stderr,none": 0.023093140398374224}, "mmlu_prehistory": {"alias": " - prehistory", "acc,none": 0.23765432098765432, "acc_stderr,none": 0.023683591837008564}, "mmlu_professional_law": {"alias": " - professional_law", "acc,none": 0.2379400260756193, "acc_stderr,none": 0.010875700787694228}, "mmlu_world_religions": {"alias": " - world_religions", "acc,none": 0.23391812865497075, "acc_stderr,none": 0.032467217651178264}, "mmlu_other": {"acc,none": 0.2565175410363695, "acc_stderr,none": 0.007817093840654172, "alias": " - other"}, "mmlu_business_ethics": {"alias": " - business_ethics", "acc,none": 0.3, "acc_stderr,none": 0.046056618647183814}, "mmlu_clinical_knowledge": {"alias": " - clinical_knowledge", "acc,none": 0.2188679245283019, "acc_stderr,none": 0.02544786382510861}, "mmlu_college_medicine": {"alias": " - college_medicine", "acc,none": 0.2023121387283237, "acc_stderr,none": 0.030631145539198823}, "mmlu_global_facts": {"alias": " - global_facts", "acc,none": 0.33, "acc_stderr,none": 0.04725815626252605}, "mmlu_human_aging": {"alias": " - human_aging", "acc,none": 0.33183856502242154, "acc_stderr,none": 0.031602951437766785}, "mmlu_management": {"alias": " - management", "acc,none": 0.23300970873786409, "acc_stderr,none": 0.04185832598928315}, "mmlu_marketing": {"alias": " - marketing", "acc,none": 0.2692307692307692, "acc_stderr,none": 0.029058588303748845}, "mmlu_medical_genetics": {"alias": " - medical_genetics", "acc,none": 0.23, "acc_stderr,none": 0.04229525846816506}, "mmlu_miscellaneous": {"alias": " - miscellaneous", "acc,none": 0.2707535121328225, "acc_stderr,none": 0.015889888362560486}, "mmlu_nutrition": {"alias": " - nutrition", "acc,none": 0.2222222222222222, "acc_stderr,none": 0.023805186524888135}, "mmlu_professional_accounting": {"alias": " - professional_accounting", "acc,none": 0.2553191489361702, "acc_stderr,none": 0.026011992930902006}, "mmlu_professional_medicine": {"alias": " - professional_medicine", "acc,none": 0.1948529411764706, "acc_stderr,none": 0.024060599423487424}, "mmlu_virology": {"alias": " - virology", "acc,none": 0.3132530120481928, "acc_stderr,none": 0.036108050180310235}, "mmlu_social_sciences": {"acc,none": 0.22424439389015274, "acc_stderr,none": 0.007507654980375001, "alias": " - social sciences"}, "mmlu_econometrics": {"alias": " - econometrics", "acc,none": 0.24561403508771928, "acc_stderr,none": 0.04049339297748142}, "mmlu_high_school_geography": {"alias": " - high_school_geography", "acc,none": 0.20202020202020202, "acc_stderr,none": 0.02860620428922989}, "mmlu_high_school_government_and_politics": {"alias": " - high_school_government_and_politics", "acc,none": 0.21761658031088082, "acc_stderr,none": 0.02977866303775296}, "mmlu_high_school_macroeconomics": {"alias": " - high_school_macroeconomics", "acc,none": 0.2153846153846154, "acc_stderr,none": 0.020843034557462874}, "mmlu_high_school_microeconomics": {"alias": " - high_school_microeconomics", "acc,none": 0.23529411764705882, "acc_stderr,none": 0.027553614467863818}, "mmlu_high_school_psychology": {"alias": " - high_school_psychology", "acc,none": 0.2018348623853211, "acc_stderr,none": 0.017208579357787575}, "mmlu_human_sexuality": {"alias": " - human_sexuality", "acc,none": 0.22137404580152673, "acc_stderr,none": 0.036412970813137296}, "mmlu_professional_psychology": {"alias": " - professional_psychology", "acc,none": 0.24836601307189543, "acc_stderr,none": 0.017479487001364764}, "mmlu_public_relations": {"alias": " - public_relations", "acc,none": 0.35454545454545455, "acc_stderr,none": 0.04582004841505416}, "mmlu_security_studies": {"alias": " - security_studies", "acc,none": 0.16326530612244897, "acc_stderr,none": 0.023661699177098615}, "mmlu_sociology": {"alias": " - sociology", "acc,none": 0.22388059701492538, "acc_stderr,none": 0.029475250236017173}, "mmlu_us_foreign_policy": {"alias": " - us_foreign_policy", "acc,none": 0.25, "acc_stderr,none": 0.04351941398892446}, "mmlu_stem": {"acc,none": 0.23628290516967967, "acc_stderr,none": 0.0075479730933306535, "alias": " - stem"}, "mmlu_abstract_algebra": {"alias": " - abstract_algebra", "acc,none": 0.24, "acc_stderr,none": 0.04292346959909283}, "mmlu_anatomy": {"alias": " - anatomy", "acc,none": 0.24444444444444444, "acc_stderr,none": 0.03712537833614866}, "mmlu_astronomy": {"alias": " - astronomy", "acc,none": 0.21052631578947367, "acc_stderr,none": 0.03317672787533157}, "mmlu_college_biology": {"alias": " - college_biology", "acc,none": 0.2152777777777778, "acc_stderr,none": 0.03437079344106135}, "mmlu_college_chemistry": {"alias": " - college_chemistry", "acc,none": 0.18, "acc_stderr,none": 0.038612291966536955}, "mmlu_college_computer_science": {"alias": " - college_computer_science", "acc,none": 0.2, "acc_stderr,none": 0.04020151261036846}, "mmlu_college_mathematics": {"alias": " - college_mathematics", "acc,none": 0.25, "acc_stderr,none": 0.04351941398892446}, "mmlu_college_physics": {"alias": " - college_physics", "acc,none": 0.22549019607843138, "acc_stderr,none": 0.041583075330832865}, "mmlu_computer_security": {"alias": " - computer_security", "acc,none": 0.24, "acc_stderr,none": 0.04292346959909282}, "mmlu_conceptual_physics": {"alias": " - conceptual_physics", "acc,none": 0.23404255319148937, "acc_stderr,none": 0.02767845257821239}, "mmlu_electrical_engineering": {"alias": " - electrical_engineering", "acc,none": 0.23448275862068965, "acc_stderr,none": 0.035306258743465914}, "mmlu_elementary_mathematics": {"alias": " - elementary_mathematics", "acc,none": 0.2328042328042328, "acc_stderr,none": 0.02176596167215454}, "mmlu_high_school_biology": {"alias": " - high_school_biology", "acc,none": 0.24838709677419354, "acc_stderr,none": 0.024580028921481003}, "mmlu_high_school_chemistry": {"alias": " - high_school_chemistry", "acc,none": 0.22167487684729065, "acc_stderr,none": 0.029225575892489614}, "mmlu_high_school_computer_science": {"alias": " - high_school_computer_science", "acc,none": 0.29, "acc_stderr,none": 0.045604802157206845}, "mmlu_high_school_mathematics": {"alias": " - high_school_mathematics", "acc,none": 0.31851851851851853, "acc_stderr,none": 0.02840653309060846}, "mmlu_high_school_physics": {"alias": " - high_school_physics", "acc,none": 0.2119205298013245, "acc_stderr,none": 0.033367670865679766}, "mmlu_high_school_statistics": {"alias": " - high_school_statistics", "acc,none": 0.14351851851851852, "acc_stderr,none": 0.02391077925264438}, "mmlu_machine_learning": {"alias": " - machine_learning", "acc,none": 0.3392857142857143, "acc_stderr,none": 0.04493949068613541}, "sciq": {"alias": "sciq", "acc,none": 0.864, "acc_stderr,none": 0.010845350230472988, "acc_norm,none": 0.809, "acc_norm_stderr,none": 0.012436787112179463}}
|
| 3 |
+
{"created_at": "2026-01-24T14:30:00.467018", "global_step": 90000, "commonsense_qa": {"alias": "commonsense_qa", "acc,none": 0.2031122031122031, "acc_stderr,none": 0.011518254793634082}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.41784505078669587, "acc_stderr,none": 0.004921964133874016, "acc_norm,none": 0.5417247560246963, "acc_norm_stderr,none": 0.0049723770859163305}, "mmlu": {"acc,none": 0.2441247685514884, "acc_stderr,none": 0.003620811025312585, "alias": "mmlu"}, "mmlu_humanities": {"acc,none": 0.23995749202975558, "acc_stderr,none": 0.006227280158369524, "alias": " - humanities"}, "mmlu_formal_logic": {"alias": " - formal_logic", "acc,none": 0.21428571428571427, "acc_stderr,none": 0.03670066451047182}, "mmlu_high_school_european_history": {"alias": " - high_school_european_history", "acc,none": 0.22424242424242424, "acc_stderr,none": 0.032568666616811015}, "mmlu_high_school_us_history": {"alias": " - high_school_us_history", "acc,none": 0.24019607843137256, "acc_stderr,none": 0.02998373305591362}, "mmlu_high_school_world_history": {"alias": " - high_school_world_history", "acc,none": 0.25738396624472576, "acc_stderr,none": 0.028458820991460302}, "mmlu_international_law": {"alias": " - international_law", "acc,none": 0.2892561983471074, "acc_stderr,none": 0.041391127276354626}, "mmlu_jurisprudence": {"alias": " - jurisprudence", "acc,none": 0.32407407407407407, "acc_stderr,none": 0.0452459600703005}, "mmlu_logical_fallacies": {"alias": " - logical_fallacies", "acc,none": 0.27607361963190186, "acc_stderr,none": 0.0351238528370505}, "mmlu_moral_disputes": {"alias": " - moral_disputes", "acc,none": 0.22254335260115607, "acc_stderr,none": 0.02239421566194282}, "mmlu_moral_scenarios": {"alias": " - moral_scenarios", "acc,none": 0.23128491620111732, "acc_stderr,none": 0.014102223623152594}, "mmlu_philosophy": {"alias": " - philosophy", "acc,none": 0.21864951768488747, "acc_stderr,none": 0.02347558141786111}, "mmlu_prehistory": {"alias": " - prehistory", "acc,none": 0.2623456790123457, "acc_stderr,none": 0.02447722285613512}, "mmlu_professional_law": {"alias": " - professional_law", "acc,none": 0.23468057366362452, "acc_stderr,none": 0.010824026872449348}, "mmlu_world_religions": {"alias": " - world_religions", "acc,none": 0.25146198830409355, "acc_stderr,none": 0.033275044238468436}, "mmlu_other": {"acc,none": 0.26295461860315417, "acc_stderr,none": 0.007885078265731137, "alias": " - other"}, "mmlu_business_ethics": {"alias": " - business_ethics", "acc,none": 0.24, "acc_stderr,none": 0.042923469599092816}, "mmlu_clinical_knowledge": {"alias": " - clinical_knowledge", "acc,none": 0.27169811320754716, "acc_stderr,none": 0.027377706624670713}, "mmlu_college_medicine": {"alias": " - college_medicine", "acc,none": 0.2023121387283237, "acc_stderr,none": 0.030631145539198823}, "mmlu_global_facts": {"alias": " - global_facts", "acc,none": 0.3, "acc_stderr,none": 0.04605661864718381}, "mmlu_human_aging": {"alias": " - human_aging", "acc,none": 0.3452914798206278, "acc_stderr,none": 0.03191100192835794}, "mmlu_management": {"alias": " - management", "acc,none": 0.2621359223300971, "acc_stderr,none": 0.04354631077260595}, "mmlu_marketing": {"alias": " - marketing", "acc,none": 0.2692307692307692, "acc_stderr,none": 0.029058588303748842}, "mmlu_medical_genetics": {"alias": " - medical_genetics", "acc,none": 0.24, "acc_stderr,none": 0.042923469599092816}, "mmlu_miscellaneous": {"alias": " - miscellaneous", "acc,none": 0.2707535121328225, "acc_stderr,none": 0.015889888362560486}, "mmlu_nutrition": {"alias": " - nutrition", "acc,none": 0.24509803921568626, "acc_stderr,none": 0.02463004897982477}, "mmlu_professional_accounting": {"alias": " - professional_accounting", "acc,none": 0.24113475177304963, "acc_stderr,none": 0.025518731049537776}, "mmlu_professional_medicine": {"alias": " - professional_medicine", "acc,none": 0.20220588235294118, "acc_stderr,none": 0.024398192986654924}, "mmlu_virology": {"alias": " - virology", "acc,none": 0.3313253012048193, "acc_stderr,none": 0.036643147772880844}, "mmlu_social_sciences": {"acc,none": 0.2349691257718557, "acc_stderr,none": 0.007632955947226125, "alias": " - social sciences"}, "mmlu_econometrics": {"alias": " - econometrics", "acc,none": 0.2543859649122807, "acc_stderr,none": 0.040969851398436695}, "mmlu_high_school_geography": {"alias": " - high_school_geography", "acc,none": 0.21717171717171718, "acc_stderr,none": 0.029376616484945644}, "mmlu_high_school_government_and_politics": {"alias": " - high_school_government_and_politics", "acc,none": 0.22279792746113988, "acc_stderr,none": 0.030031147977641545}, "mmlu_high_school_macroeconomics": {"alias": " - high_school_macroeconomics", "acc,none": 0.20256410256410257, "acc_stderr,none": 0.020377660970371386}, "mmlu_high_school_microeconomics": {"alias": " - high_school_microeconomics", "acc,none": 0.22268907563025211, "acc_stderr,none": 0.027025433498882395}, "mmlu_high_school_psychology": {"alias": " - high_school_psychology", "acc,none": 0.23669724770642203, "acc_stderr,none": 0.01822407811729909}, "mmlu_human_sexuality": {"alias": " - human_sexuality", "acc,none": 0.2366412213740458, "acc_stderr,none": 0.03727673575596919}, "mmlu_professional_psychology": {"alias": " - professional_psychology", "acc,none": 0.2549019607843137, "acc_stderr,none": 0.017630827375148383}, "mmlu_public_relations": {"alias": " - public_relations", "acc,none": 0.36363636363636365, "acc_stderr,none": 0.04607582090719976}, "mmlu_security_studies": {"alias": " - security_studies", "acc,none": 0.17551020408163265, "acc_stderr,none": 0.024352800722970015}, "mmlu_sociology": {"alias": " - sociology", "acc,none": 0.25870646766169153, "acc_stderr,none": 0.030965903123573033}, "mmlu_us_foreign_policy": {"alias": " - us_foreign_policy", "acc,none": 0.25, "acc_stderr,none": 0.04351941398892446}, "mmlu_stem": {"acc,none": 0.2407231208372978, "acc_stderr,none": 0.0076037876769203165, "alias": " - stem"}, "mmlu_abstract_algebra": {"alias": " - abstract_algebra", "acc,none": 0.28, "acc_stderr,none": 0.04512608598542127}, "mmlu_anatomy": {"alias": " - anatomy", "acc,none": 0.25925925925925924, "acc_stderr,none": 0.03785714465066653}, "mmlu_astronomy": {"alias": " - astronomy", "acc,none": 0.17763157894736842, "acc_stderr,none": 0.031103182383123377}, "mmlu_college_biology": {"alias": " - college_biology", "acc,none": 0.2361111111111111, "acc_stderr,none": 0.03551446610810826}, "mmlu_college_chemistry": {"alias": " - college_chemistry", "acc,none": 0.22, "acc_stderr,none": 0.041633319989322695}, "mmlu_college_computer_science": {"alias": " - college_computer_science", "acc,none": 0.2, "acc_stderr,none": 0.040201512610368445}, "mmlu_college_mathematics": {"alias": " - college_mathematics", "acc,none": 0.26, "acc_stderr,none": 0.04408440022768078}, "mmlu_college_physics": {"alias": " - college_physics", "acc,none": 0.20588235294117646, "acc_stderr,none": 0.04023382273617746}, "mmlu_computer_security": {"alias": " - computer_security", "acc,none": 0.29, "acc_stderr,none": 0.04560480215720684}, "mmlu_conceptual_physics": {"alias": " - conceptual_physics", "acc,none": 0.2978723404255319, "acc_stderr,none": 0.02989614568209546}, "mmlu_electrical_engineering": {"alias": " - electrical_engineering", "acc,none": 0.2, "acc_stderr,none": 0.033333333333333284}, "mmlu_elementary_mathematics": {"alias": " - elementary_mathematics", "acc,none": 0.24603174603174602, "acc_stderr,none": 0.022182037202948368}, "mmlu_high_school_biology": {"alias": " - high_school_biology", "acc,none": 0.25161290322580643, "acc_stderr,none": 0.02468597928623996}, "mmlu_high_school_chemistry": {"alias": " - high_school_chemistry", "acc,none": 0.26108374384236455, "acc_stderr,none": 0.03090379695211447}, "mmlu_high_school_computer_science": {"alias": " - high_school_computer_science", "acc,none": 0.24, "acc_stderr,none": 0.042923469599092816}, "mmlu_high_school_mathematics": {"alias": " - high_school_mathematics", "acc,none": 0.26296296296296295, "acc_stderr,none": 0.026842057873833706}, "mmlu_high_school_physics": {"alias": " - high_school_physics", "acc,none": 0.19205298013245034, "acc_stderr,none": 0.032162984205936156}, "mmlu_high_school_statistics": {"alias": " - high_school_statistics", "acc,none": 0.16203703703703703, "acc_stderr,none": 0.025130453652268455}, "mmlu_machine_learning": {"alias": " - machine_learning", "acc,none": 0.3125, "acc_stderr,none": 0.043994650575715215}, "sciq": {"alias": "sciq", "acc,none": 0.861, "acc_stderr,none": 0.010945263761042975, "acc_norm,none": 0.804, "acc_norm_stderr,none": 0.012559527926707361}}
|
| 4 |
+
{"created_at": "2026-01-24T18:42:02.296251", "global_step": 120000, "commonsense_qa": {"alias": "commonsense_qa", "acc,none": 0.19492219492219492, "acc_stderr,none": 0.011341478090883525}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.42889862577175863, "acc_stderr,none": 0.004939073014754939, "acc_norm,none": 0.5571599283011353, "acc_norm_stderr,none": 0.0049570683775165105}, "mmlu": {"acc,none": 0.24120495655889473, "acc_stderr,none": 0.0036061821820936144, "alias": "mmlu"}, "mmlu_humanities": {"acc,none": 0.23995749202975558, "acc_stderr,none": 0.006224982953131334, "alias": " - humanities"}, "mmlu_formal_logic": {"alias": " - formal_logic", "acc,none": 0.25396825396825395, "acc_stderr,none": 0.038932596106046755}, "mmlu_high_school_european_history": {"alias": " - high_school_european_history", "acc,none": 0.21212121212121213, "acc_stderr,none": 0.031922715695483}, "mmlu_high_school_us_history": {"alias": " - high_school_us_history", "acc,none": 0.25, "acc_stderr,none": 0.03039153369274154}, "mmlu_high_school_world_history": {"alias": " - high_school_world_history", "acc,none": 0.29957805907172996, "acc_stderr,none": 0.029818024749753102}, "mmlu_international_law": {"alias": " - international_law", "acc,none": 0.2396694214876033, "acc_stderr,none": 0.03896878985070416}, "mmlu_jurisprudence": {"alias": " - jurisprudence", "acc,none": 0.2777777777777778, "acc_stderr,none": 0.043300437496507437}, "mmlu_logical_fallacies": {"alias": " - logical_fallacies", "acc,none": 0.2331288343558282, "acc_stderr,none": 0.03322015795776741}, "mmlu_moral_disputes": {"alias": " - moral_disputes", "acc,none": 0.24566473988439305, "acc_stderr,none": 0.02317629820399201}, "mmlu_moral_scenarios": {"alias": " - moral_scenarios", "acc,none": 0.2435754189944134, "acc_stderr,none": 0.014355911964767865}, "mmlu_philosophy": {"alias": " - philosophy", "acc,none": 0.18971061093247588, "acc_stderr,none": 0.022268196258783225}, "mmlu_prehistory": {"alias": " - prehistory", "acc,none": 0.2191358024691358, "acc_stderr,none": 0.023016705640262206}, "mmlu_professional_law": {"alias": " - professional_law", "acc,none": 0.23468057366362452, "acc_stderr,none": 0.010824026872449346}, "mmlu_world_religions": {"alias": " - world_religions", "acc,none": 0.29239766081871343, "acc_stderr,none": 0.03488647713457921}, "mmlu_other": {"acc,none": 0.261345349211458, "acc_stderr,none": 0.007864821369688076, "alias": " - other"}, "mmlu_business_ethics": {"alias": " - business_ethics", "acc,none": 0.2, "acc_stderr,none": 0.040201512610368445}, "mmlu_clinical_knowledge": {"alias": " - clinical_knowledge", "acc,none": 0.2037735849056604, "acc_stderr,none": 0.024790784501775406}, "mmlu_college_medicine": {"alias": " - college_medicine", "acc,none": 0.2138728323699422, "acc_stderr,none": 0.03126511206173044}, "mmlu_global_facts": {"alias": " - global_facts", "acc,none": 0.25, "acc_stderr,none": 0.04351941398892446}, "mmlu_human_aging": {"alias": " - human_aging", "acc,none": 0.3721973094170404, "acc_stderr,none": 0.032443052830087304}, "mmlu_management": {"alias": " - management", "acc,none": 0.27184466019417475, "acc_stderr,none": 0.044052680241409216}, "mmlu_marketing": {"alias": " - marketing", "acc,none": 0.2863247863247863, "acc_stderr,none": 0.029614323690456655}, "mmlu_medical_genetics": {"alias": " - medical_genetics", "acc,none": 0.28, "acc_stderr,none": 0.04512608598542127}, "mmlu_miscellaneous": {"alias": " - miscellaneous", "acc,none": 0.2656449553001277, "acc_stderr,none": 0.01579430248788871}, "mmlu_nutrition": {"alias": " - nutrition", "acc,none": 0.22549019607843138, "acc_stderr,none": 0.023929155517351294}, "mmlu_professional_accounting": {"alias": " - professional_accounting", "acc,none": 0.25177304964539005, "acc_stderr,none": 0.025892151156709405}, "mmlu_professional_medicine": {"alias": " - professional_medicine", "acc,none": 0.28308823529411764, "acc_stderr,none": 0.02736586113151381}, "mmlu_virology": {"alias": " - virology", "acc,none": 0.2710843373493976, "acc_stderr,none": 0.03460579907553026}, "mmlu_social_sciences": {"acc,none": 0.22424439389015274, "acc_stderr,none": 0.007511955204526759, "alias": " - social sciences"}, "mmlu_econometrics": {"alias": " - econometrics", "acc,none": 0.22807017543859648, "acc_stderr,none": 0.03947152782669415}, "mmlu_high_school_geography": {"alias": " - high_school_geography", "acc,none": 0.21212121212121213, "acc_stderr,none": 0.029126522834586832}, "mmlu_high_school_government_and_politics": {"alias": " - high_school_government_and_politics", "acc,none": 0.20207253886010362, "acc_stderr,none": 0.02897908979429673}, "mmlu_high_school_macroeconomics": {"alias": " - high_school_macroeconomics", "acc,none": 0.20512820512820512, "acc_stderr,none": 0.02047323317355198}, "mmlu_high_school_microeconomics": {"alias": " - high_school_microeconomics", "acc,none": 0.21428571428571427, "acc_stderr,none": 0.02665353159671548}, "mmlu_high_school_psychology": {"alias": " - high_school_psychology", "acc,none": 0.1963302752293578, "acc_stderr,none": 0.017030719339154368}, "mmlu_human_sexuality": {"alias": " - human_sexuality", "acc,none": 0.2748091603053435, "acc_stderr,none": 0.03915345408847836}, "mmlu_professional_psychology": {"alias": " - professional_psychology", "acc,none": 0.24673202614379086, "acc_stderr,none": 0.017440820367402507}, "mmlu_public_relations": {"alias": " - public_relations", "acc,none": 0.3090909090909091, "acc_stderr,none": 0.044262946482000985}, "mmlu_security_studies": {"alias": " - security_studies", "acc,none": 0.19591836734693877, "acc_stderr,none": 0.025409301953225678}, "mmlu_sociology": {"alias": " - sociology", "acc,none": 0.22388059701492538, "acc_stderr,none": 0.02947525023601718}, "mmlu_us_foreign_policy": {"alias": " - us_foreign_policy", "acc,none": 0.31, "acc_stderr,none": 0.04648231987117316}, "mmlu_stem": {"acc,none": 0.23977164605137963, "acc_stderr,none": 0.007605249585131962, "alias": " - stem"}, "mmlu_abstract_algebra": {"alias": " - abstract_algebra", "acc,none": 0.23, "acc_stderr,none": 0.04229525846816507}, "mmlu_anatomy": {"alias": " - anatomy", "acc,none": 0.26666666666666666, "acc_stderr,none": 0.03820169914517904}, "mmlu_astronomy": {"alias": " - astronomy", "acc,none": 0.18421052631578946, "acc_stderr,none": 0.031546980450822305}, "mmlu_college_biology": {"alias": " - college_biology", "acc,none": 0.2638888888888889, "acc_stderr,none": 0.03685651095897532}, "mmlu_college_chemistry": {"alias": " - college_chemistry", "acc,none": 0.23, "acc_stderr,none": 0.04229525846816505}, "mmlu_college_computer_science": {"alias": " - college_computer_science", "acc,none": 0.23, "acc_stderr,none": 0.04229525846816503}, "mmlu_college_mathematics": {"alias": " - college_mathematics", "acc,none": 0.22, "acc_stderr,none": 0.04163331998932269}, "mmlu_college_physics": {"alias": " - college_physics", "acc,none": 0.21568627450980393, "acc_stderr,none": 0.04092563958237654}, "mmlu_computer_security": {"alias": " - computer_security", "acc,none": 0.29, "acc_stderr,none": 0.045604802157206845}, "mmlu_conceptual_physics": {"alias": " - conceptual_physics", "acc,none": 0.26382978723404255, "acc_stderr,none": 0.02880998985410297}, "mmlu_electrical_engineering": {"alias": " - electrical_engineering", "acc,none": 0.23448275862068965, "acc_stderr,none": 0.035306258743465914}, "mmlu_elementary_mathematics": {"alias": " - elementary_mathematics", "acc,none": 0.22486772486772486, "acc_stderr,none": 0.021502096078229137}, "mmlu_high_school_biology": {"alias": " - high_school_biology", "acc,none": 0.1967741935483871, "acc_stderr,none": 0.022616409420742015}, "mmlu_high_school_chemistry": {"alias": " - high_school_chemistry", "acc,none": 0.23645320197044334, "acc_stderr,none": 0.029896114291733552}, "mmlu_high_school_computer_science": {"alias": " - high_school_computer_science", "acc,none": 0.31, "acc_stderr,none": 0.04648231987117316}, "mmlu_high_school_mathematics": {"alias": " - high_school_mathematics", "acc,none": 0.25925925925925924, "acc_stderr,none": 0.026719240783712166}, "mmlu_high_school_physics": {"alias": " - high_school_physics", "acc,none": 0.271523178807947, "acc_stderr,none": 0.03631329803969655}, "mmlu_high_school_statistics": {"alias": " - high_school_statistics", "acc,none": 0.21296296296296297, "acc_stderr,none": 0.02792096314799367}, "mmlu_machine_learning": {"alias": " - machine_learning", "acc,none": 0.30357142857142855, "acc_stderr,none": 0.04364226155841044}, "sciq": {"alias": "sciq", "acc,none": 0.881, "acc_stderr,none": 0.010244215145336666, "acc_norm,none": 0.828, "acc_norm_stderr,none": 0.011939788882495321}}
|
| 5 |
+
{"created_at": "2026-01-24T22:49:55.047993", "global_step": 150000, "commonsense_qa": {"alias": "commonsense_qa", "acc,none": 0.19574119574119575, "acc_stderr,none": 0.011359497363584393}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.4358693487353117, "acc_stderr,none": 0.004948567856373875, "acc_norm,none": 0.5641306512646883, "acc_norm_stderr,none": 0.004948567856373853}, "mmlu": {"acc,none": 0.2444096282580829, "acc_stderr,none": 0.003622320623165458, "alias": "mmlu"}, "mmlu_humanities": {"acc,none": 0.23910733262486716, "acc_stderr,none": 0.006217014936246563, "alias": " - humanities"}, "mmlu_formal_logic": {"alias": " - formal_logic", "acc,none": 0.30952380952380953, "acc_stderr,none": 0.041349130183033156}, "mmlu_high_school_european_history": {"alias": " - high_school_european_history", "acc,none": 0.19393939393939394, "acc_stderr,none": 0.0308741451365621}, "mmlu_high_school_us_history": {"alias": " - high_school_us_history", "acc,none": 0.22549019607843138, "acc_stderr,none": 0.029331162294251728}, "mmlu_high_school_world_history": {"alias": " - high_school_world_history", "acc,none": 0.270042194092827, "acc_stderr,none": 0.028900721906293426}, "mmlu_international_law": {"alias": " - international_law", "acc,none": 0.2231404958677686, "acc_stderr,none": 0.03800754475228733}, "mmlu_jurisprudence": {"alias": " - jurisprudence", "acc,none": 0.25925925925925924, "acc_stderr,none": 0.04236511258094632}, "mmlu_logical_fallacies": {"alias": " - logical_fallacies", "acc,none": 0.22085889570552147, "acc_stderr,none": 0.032591773927421776}, "mmlu_moral_disputes": {"alias": " - moral_disputes", "acc,none": 0.2514450867052023, "acc_stderr,none": 0.023357365785874037}, "mmlu_moral_scenarios": {"alias": " - moral_scenarios", "acc,none": 0.2424581005586592, "acc_stderr,none": 0.014333522059217892}, "mmlu_philosophy": {"alias": " - philosophy", "acc,none": 0.17684887459807075, "acc_stderr,none": 0.02167005888551079}, "mmlu_prehistory": {"alias": " - prehistory", "acc,none": 0.25308641975308643, "acc_stderr,none": 0.024191808600713}, "mmlu_professional_law": {"alias": " - professional_law", "acc,none": 0.23989569752281617, "acc_stderr,none": 0.010906282617981648}, "mmlu_world_religions": {"alias": " - world_religions", "acc,none": 0.2573099415204678, "acc_stderr,none": 0.03352799844161865}, "mmlu_other": {"acc,none": 0.2658513035082073, "acc_stderr,none": 0.007908370085376043, "alias": " - other"}, "mmlu_business_ethics": {"alias": " - business_ethics", "acc,none": 0.32, "acc_stderr,none": 0.04688261722621505}, "mmlu_clinical_knowledge": {"alias": " - clinical_knowledge", "acc,none": 0.23018867924528302, "acc_stderr,none": 0.02590789712240817}, "mmlu_college_medicine": {"alias": " - college_medicine", "acc,none": 0.18497109826589594, "acc_stderr,none": 0.02960562398177122}, "mmlu_global_facts": {"alias": " - global_facts", "acc,none": 0.22, "acc_stderr,none": 0.04163331998932269}, "mmlu_human_aging": {"alias": " - human_aging", "acc,none": 0.34977578475336324, "acc_stderr,none": 0.03200736719484503}, "mmlu_management": {"alias": " - management", "acc,none": 0.27184466019417475, "acc_stderr,none": 0.044052680241409216}, "mmlu_marketing": {"alias": " - marketing", "acc,none": 0.2948717948717949, "acc_stderr,none": 0.02987257770889117}, "mmlu_medical_genetics": {"alias": " - medical_genetics", "acc,none": 0.33, "acc_stderr,none": 0.04725815626252604}, "mmlu_miscellaneous": {"alias": " - miscellaneous", "acc,none": 0.2541507024265645, "acc_stderr,none": 0.015569254692045778}, "mmlu_nutrition": {"alias": " - nutrition", "acc,none": 0.24183006535947713, "acc_stderr,none": 0.024518195641879334}, "mmlu_professional_accounting": {"alias": " - professional_accounting", "acc,none": 0.25886524822695034, "acc_stderr,none": 0.026129572527180848}, "mmlu_professional_medicine": {"alias": " - professional_medicine", "acc,none": 0.25735294117647056, "acc_stderr,none": 0.026556519470041517}, "mmlu_virology": {"alias": " - virology", "acc,none": 0.3313253012048193, "acc_stderr,none": 0.03664314777288086}, "mmlu_social_sciences": {"acc,none": 0.22749431264218395, "acc_stderr,none": 0.007554034989667286, "alias": " - social sciences"}, "mmlu_econometrics": {"alias": " - econometrics", "acc,none": 0.23684210526315788, "acc_stderr,none": 0.039994238792813365}, "mmlu_high_school_geography": {"alias": " - high_school_geography", "acc,none": 0.18686868686868688, "acc_stderr,none": 0.02777253333421899}, "mmlu_high_school_government_and_politics": {"alias": " - high_school_government_and_politics", "acc,none": 0.21243523316062177, "acc_stderr,none": 0.029519282616817254}, "mmlu_high_school_macroeconomics": {"alias": " - high_school_macroeconomics", "acc,none": 0.21794871794871795, "acc_stderr,none": 0.02093244577446317}, "mmlu_high_school_microeconomics": {"alias": " - high_school_microeconomics", "acc,none": 0.21428571428571427, "acc_stderr,none": 0.026653531596715484}, "mmlu_high_school_psychology": {"alias": " - high_school_psychology", "acc,none": 0.2, "acc_stderr,none": 0.01714985851425093}, "mmlu_human_sexuality": {"alias": " - human_sexuality", "acc,none": 0.2900763358778626, "acc_stderr,none": 0.03980066246467765}, "mmlu_professional_psychology": {"alias": " - professional_psychology", "acc,none": 0.24509803921568626, "acc_stderr,none": 0.017401816711427636}, "mmlu_public_relations": {"alias": " - public_relations", "acc,none": 0.2727272727272727, "acc_stderr,none": 0.04265792110940588}, "mmlu_security_studies": {"alias": " - security_studies", "acc,none": 0.21224489795918366, "acc_stderr,none": 0.026176967197866767}, "mmlu_sociology": {"alias": " - sociology", "acc,none": 0.24378109452736318, "acc_stderr,none": 0.03036049015401466}, "mmlu_us_foreign_policy": {"alias": " - us_foreign_policy", "acc,none": 0.31, "acc_stderr,none": 0.04648231987117316}, "mmlu_stem": {"acc,none": 0.24770060260069776, "acc_stderr,none": 0.007688064510829577, "alias": " - stem"}, "mmlu_abstract_algebra": {"alias": " - abstract_algebra", "acc,none": 0.24, "acc_stderr,none": 0.042923469599092816}, "mmlu_anatomy": {"alias": " - anatomy", "acc,none": 0.2962962962962963, "acc_stderr,none": 0.03944624162501116}, "mmlu_astronomy": {"alias": " - astronomy", "acc,none": 0.19736842105263158, "acc_stderr,none": 0.03238981601699397}, "mmlu_college_biology": {"alias": " - college_biology", "acc,none": 0.2222222222222222, "acc_stderr,none": 0.03476590104304134}, "mmlu_college_chemistry": {"alias": " - college_chemistry", "acc,none": 0.25, "acc_stderr,none": 0.04351941398892446}, "mmlu_college_computer_science": {"alias": " - college_computer_science", "acc,none": 0.26, "acc_stderr,none": 0.0440844002276808}, "mmlu_college_mathematics": {"alias": " - college_mathematics", "acc,none": 0.24, "acc_stderr,none": 0.04292346959909282}, "mmlu_college_physics": {"alias": " - college_physics", "acc,none": 0.23529411764705882, "acc_stderr,none": 0.042207736591714534}, "mmlu_computer_security": {"alias": " - computer_security", "acc,none": 0.28, "acc_stderr,none": 0.045126085985421276}, "mmlu_conceptual_physics": {"alias": " - conceptual_physics", "acc,none": 0.28936170212765955, "acc_stderr,none": 0.02964400657700962}, "mmlu_electrical_engineering": {"alias": " - electrical_engineering", "acc,none": 0.2896551724137931, "acc_stderr,none": 0.03780019230438014}, "mmlu_elementary_mathematics": {"alias": " - elementary_mathematics", "acc,none": 0.2222222222222222, "acc_stderr,none": 0.02141168439369419}, "mmlu_high_school_biology": {"alias": " - high_school_biology", "acc,none": 0.2129032258064516, "acc_stderr,none": 0.023287665127268525}, "mmlu_high_school_chemistry": {"alias": " - high_school_chemistry", "acc,none": 0.2660098522167488, "acc_stderr,none": 0.03108982600293753}, "mmlu_high_school_computer_science": {"alias": " - high_school_computer_science", "acc,none": 0.26, "acc_stderr,none": 0.0440844002276808}, "mmlu_high_school_mathematics": {"alias": " - high_school_mathematics", "acc,none": 0.2777777777777778, "acc_stderr,none": 0.027309140588230193}, "mmlu_high_school_physics": {"alias": " - high_school_physics", "acc,none": 0.26490066225165565, "acc_stderr,none": 0.03603038545360382}, "mmlu_high_school_statistics": {"alias": " - high_school_statistics", "acc,none": 0.18981481481481483, "acc_stderr,none": 0.026744714834691943}, "mmlu_machine_learning": {"alias": " - machine_learning", "acc,none": 0.2857142857142857, "acc_stderr,none": 0.04287858751340456}, "sciq": {"alias": "sciq", "acc,none": 0.885, "acc_stderr,none": 0.010093407594904612, "acc_norm,none": 0.834, "acc_norm_stderr,none": 0.011772110370812192}}
|