craffel HF Staff commited on
Commit
35ddbe2
·
verified ·
1 Parent(s): 729c47d

Upload nemotron_actual_1T_exp_7B/metrics.eval.jsonl with huggingface_hub

Browse files
nemotron_actual_1T_exp_7B/metrics.eval.jsonl ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {"created_at": "2026-01-23T02:34:51.624360", "global_step": 30000, "commonsense_qa": {"alias": "commonsense_qa", "acc,none": 0.22113022113022113, "acc_stderr,none": 0.011881644696037884}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.48994224258115915, "acc_stderr,none": 0.004988771791854507, "acc_norm,none": 0.6530571599283012, "acc_norm_stderr,none": 0.004750245757533306}, "mmlu": {"acc,none": 0.2833641931348811, "acc_stderr,none": 0.0037911244737802922, "alias": "mmlu"}, "mmlu_humanities": {"acc,none": 0.28012752391073326, "acc_stderr,none": 0.006538358825769817, "alias": " - humanities"}, "mmlu_formal_logic": {"alias": " - formal_logic", "acc,none": 0.19047619047619047, "acc_stderr,none": 0.03512207412302052}, "mmlu_high_school_european_history": {"alias": " - high_school_european_history", "acc,none": 0.3333333333333333, "acc_stderr,none": 0.0368105086916155}, "mmlu_high_school_us_history": {"alias": " - high_school_us_history", "acc,none": 0.2549019607843137, "acc_stderr,none": 0.030587591351604246}, "mmlu_high_school_world_history": {"alias": " - high_school_world_history", "acc,none": 0.27848101265822783, "acc_stderr,none": 0.029178682304842548}, "mmlu_international_law": {"alias": " - international_law", "acc,none": 0.30578512396694213, "acc_stderr,none": 0.04205953933884121}, "mmlu_jurisprudence": {"alias": " - jurisprudence", "acc,none": 0.3055555555555556, "acc_stderr,none": 0.044531975073749834}, "mmlu_logical_fallacies": {"alias": " - logical_fallacies", "acc,none": 0.3128834355828221, "acc_stderr,none": 0.03642914578292406}, "mmlu_moral_disputes": {"alias": " - moral_disputes", "acc,none": 0.3468208092485549, "acc_stderr,none": 0.02562472399403046}, "mmlu_moral_scenarios": {"alias": " - moral_scenarios", "acc,none": 0.24692737430167597, "acc_stderr,none": 0.014422292204808857}, "mmlu_philosophy": {"alias": " - philosophy", "acc,none": 0.3086816720257235, "acc_stderr,none": 0.026236965881153252}, "mmlu_prehistory": {"alias": " - prehistory", "acc,none": 0.3055555555555556, "acc_stderr,none": 0.025630824975621344}, "mmlu_professional_law": {"alias": " - professional_law", "acc,none": 0.2711864406779661, "acc_stderr,none": 0.011354581451622985}, "mmlu_world_religions": {"alias": " - world_religions", "acc,none": 0.2807017543859649, "acc_stderr,none": 0.034462962170884265}, "mmlu_other": {"acc,none": 0.2851625362085613, "acc_stderr,none": 0.008063667256107522, "alias": " - other"}, "mmlu_business_ethics": {"alias": " - business_ethics", "acc,none": 0.28, "acc_stderr,none": 0.04512608598542128}, "mmlu_clinical_knowledge": {"alias": " - clinical_knowledge", "acc,none": 0.2188679245283019, "acc_stderr,none": 0.025447863825108608}, "mmlu_college_medicine": {"alias": " - college_medicine", "acc,none": 0.2543352601156069, "acc_stderr,none": 0.0332055644308557}, "mmlu_global_facts": {"alias": " - global_facts", "acc,none": 0.39, "acc_stderr,none": 0.04902071300001975}, "mmlu_human_aging": {"alias": " - human_aging", "acc,none": 0.3004484304932735, "acc_stderr,none": 0.030769352008229143}, "mmlu_management": {"alias": " - management", "acc,none": 0.2621359223300971, "acc_stderr,none": 0.04354631077260595}, "mmlu_marketing": {"alias": " - marketing", "acc,none": 0.29914529914529914, "acc_stderr,none": 0.02999695185834947}, "mmlu_medical_genetics": {"alias": " - medical_genetics", "acc,none": 0.25, "acc_stderr,none": 0.04351941398892446}, "mmlu_miscellaneous": {"alias": " - miscellaneous", "acc,none": 0.30395913154533843, "acc_stderr,none": 0.016448321686769046}, "mmlu_nutrition": {"alias": " - nutrition", "acc,none": 0.33986928104575165, "acc_stderr,none": 0.027121956071388852}, "mmlu_professional_accounting": {"alias": " - professional_accounting", "acc,none": 0.2730496453900709, "acc_stderr,none": 0.026577860943307857}, "mmlu_professional_medicine": {"alias": " - professional_medicine", "acc,none": 0.17647058823529413, "acc_stderr,none": 0.023157468308559356}, "mmlu_virology": {"alias": " - virology", "acc,none": 0.3674698795180723, "acc_stderr,none": 0.03753267402120574}, "mmlu_social_sciences": {"acc,none": 0.28599285017874554, "acc_stderr,none": 0.008105534463476602, "alias": " - social sciences"}, "mmlu_econometrics": {"alias": " - econometrics", "acc,none": 0.2807017543859649, "acc_stderr,none": 0.042270544512322004}, "mmlu_high_school_geography": {"alias": " - high_school_geography", "acc,none": 0.21717171717171718, "acc_stderr,none": 0.029376616484945637}, "mmlu_high_school_government_and_politics": {"alias": " - high_school_government_and_politics", "acc,none": 0.2538860103626943, "acc_stderr,none": 0.03141024780565317}, "mmlu_high_school_macroeconomics": {"alias": " - high_school_macroeconomics", "acc,none": 0.24102564102564103, "acc_stderr,none": 0.021685546665333188}, "mmlu_high_school_microeconomics": {"alias": " - high_school_microeconomics", "acc,none": 0.25630252100840334, "acc_stderr,none": 0.02835962087053395}, "mmlu_high_school_psychology": {"alias": " - high_school_psychology", "acc,none": 0.25688073394495414, "acc_stderr,none": 0.018732492928342455}, "mmlu_human_sexuality": {"alias": " - human_sexuality", "acc,none": 0.3511450381679389, "acc_stderr,none": 0.0418644516301375}, "mmlu_professional_psychology": {"alias": " - professional_psychology", "acc,none": 0.2826797385620915, "acc_stderr,none": 0.01821726955205345}, "mmlu_public_relations": {"alias": " - public_relations", "acc,none": 0.2909090909090909, "acc_stderr,none": 0.04350271442923243}, "mmlu_security_studies": {"alias": " - security_studies", "acc,none": 0.40408163265306124, "acc_stderr,none": 0.03141470802586589}, "mmlu_sociology": {"alias": " - sociology", "acc,none": 0.3582089552238806, "acc_stderr,none": 0.03390393042268815}, "mmlu_us_foreign_policy": {"alias": " - us_foreign_policy", "acc,none": 0.39, "acc_stderr,none": 0.04902071300001975}, "mmlu_stem": {"acc,none": 0.28385664446558834, "acc_stderr,none": 0.008010206632461795, "alias": " - stem"}, "mmlu_abstract_algebra": {"alias": " - abstract_algebra", "acc,none": 0.32, "acc_stderr,none": 0.04688261722621505}, "mmlu_anatomy": {"alias": " - anatomy", "acc,none": 0.2814814814814815, "acc_stderr,none": 0.038850042458002526}, "mmlu_astronomy": {"alias": " - astronomy", "acc,none": 0.3157894736842105, "acc_stderr,none": 0.03782728980865471}, "mmlu_college_biology": {"alias": " - college_biology", "acc,none": 0.2708333333333333, "acc_stderr,none": 0.037161774375660164}, "mmlu_college_chemistry": {"alias": " - college_chemistry", "acc,none": 0.24, "acc_stderr,none": 0.04292346959909284}, "mmlu_college_computer_science": {"alias": " - college_computer_science", "acc,none": 0.27, "acc_stderr,none": 0.044619604333847394}, "mmlu_college_mathematics": {"alias": " - college_mathematics", "acc,none": 0.22, "acc_stderr,none": 0.04163331998932269}, "mmlu_college_physics": {"alias": " - college_physics", "acc,none": 0.24509803921568626, "acc_stderr,none": 0.04280105837364395}, "mmlu_computer_security": {"alias": " - computer_security", "acc,none": 0.36, "acc_stderr,none": 0.048241815132442176}, "mmlu_conceptual_physics": {"alias": " - conceptual_physics", "acc,none": 0.3446808510638298, "acc_stderr,none": 0.031068985963122145}, "mmlu_electrical_engineering": {"alias": " - electrical_engineering", "acc,none": 0.38620689655172413, "acc_stderr,none": 0.04057324734419034}, "mmlu_elementary_mathematics": {"alias": " - elementary_mathematics", "acc,none": 0.24603174603174602, "acc_stderr,none": 0.022182037202948368}, "mmlu_high_school_biology": {"alias": " - high_school_biology", "acc,none": 0.3096774193548387, "acc_stderr,none": 0.026302774983517418}, "mmlu_high_school_chemistry": {"alias": " - high_school_chemistry", "acc,none": 0.2857142857142857, "acc_stderr,none": 0.0317852971064275}, "mmlu_high_school_computer_science": {"alias": " - high_school_computer_science", "acc,none": 0.36, "acc_stderr,none": 0.048241815132442176}, "mmlu_high_school_mathematics": {"alias": " - high_school_mathematics", "acc,none": 0.26666666666666666, "acc_stderr,none": 0.02696242432507383}, "mmlu_high_school_physics": {"alias": " - high_school_physics", "acc,none": 0.25165562913907286, "acc_stderr,none": 0.035433042343899844}, "mmlu_high_school_statistics": {"alias": " - high_school_statistics", "acc,none": 0.20833333333333334, "acc_stderr,none": 0.02769691071309394}, "mmlu_machine_learning": {"alias": " - machine_learning", "acc,none": 0.25892857142857145, "acc_stderr,none": 0.04157751539865629}, "sciq": {"alias": "sciq", "acc,none": 0.938, "acc_stderr,none": 0.0076298239962803134, "acc_norm,none": 0.916, "acc_norm_stderr,none": 0.008776162089491089}}
2
+ {"created_at": "2026-01-24T07:05:24.146000", "global_step": 60000, "commonsense_qa": {"alias": "commonsense_qa", "acc,none": 0.4381654381654382, "acc_stderr,none": 0.014205069718489775}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.5174268074088827, "acc_stderr,none": 0.004986749760948699, "acc_norm,none": 0.6896036646086438, "acc_norm_stderr,none": 0.004617103280372021}, "mmlu": {"acc,none": 0.4093433983762997, "acc_stderr,none": 0.0040623071036338465, "alias": "mmlu"}, "mmlu_humanities": {"acc,none": 0.39086078639744953, "acc_stderr,none": 0.006976372932942577, "alias": " - humanities"}, "mmlu_formal_logic": {"alias": " - formal_logic", "acc,none": 0.2857142857142857, "acc_stderr,none": 0.04040610178208841}, "mmlu_high_school_european_history": {"alias": " - high_school_european_history", "acc,none": 0.509090909090909, "acc_stderr,none": 0.03903698647748441}, "mmlu_high_school_us_history": {"alias": " - high_school_us_history", "acc,none": 0.5245098039215687, "acc_stderr,none": 0.03505093194348798}, "mmlu_high_school_world_history": {"alias": " - high_school_world_history", "acc,none": 0.5485232067510548, "acc_stderr,none": 0.03239360017397471}, "mmlu_international_law": {"alias": " - international_law", "acc,none": 0.5702479338842975, "acc_stderr,none": 0.04519082021319772}, "mmlu_jurisprudence": {"alias": " - jurisprudence", "acc,none": 0.4444444444444444, "acc_stderr,none": 0.04803752235190192}, "mmlu_logical_fallacies": {"alias": " - logical_fallacies", "acc,none": 0.44785276073619634, "acc_stderr,none": 0.03906947479456602}, "mmlu_moral_disputes": {"alias": " - moral_disputes", "acc,none": 0.41329479768786126, "acc_stderr,none": 0.026511261369409244}, "mmlu_moral_scenarios": {"alias": " - moral_scenarios", "acc,none": 0.26145251396648045, "acc_stderr,none": 0.014696599650364555}, "mmlu_philosophy": {"alias": " - philosophy", "acc,none": 0.4694533762057878, "acc_stderr,none": 0.02834504586484068}, "mmlu_prehistory": {"alias": " - prehistory", "acc,none": 0.4660493827160494, "acc_stderr,none": 0.027756535257347663}, "mmlu_professional_law": {"alias": " - professional_law", "acc,none": 0.34028683181225555, "acc_stderr,none": 0.01210121761022378}, "mmlu_world_religions": {"alias": " - world_religions", "acc,none": 0.5614035087719298, "acc_stderr,none": 0.0380579750559046}, "mmlu_other": {"acc,none": 0.4486643064048922, "acc_stderr,none": 0.008779692015665964, "alias": " - other"}, "mmlu_business_ethics": {"alias": " - business_ethics", "acc,none": 0.41, "acc_stderr,none": 0.049431107042371025}, "mmlu_clinical_knowledge": {"alias": " - clinical_knowledge", "acc,none": 0.42641509433962266, "acc_stderr,none": 0.030437794342983042}, "mmlu_college_medicine": {"alias": " - college_medicine", "acc,none": 0.36416184971098264, "acc_stderr,none": 0.036690724774169084}, "mmlu_global_facts": {"alias": " - global_facts", "acc,none": 0.38, "acc_stderr,none": 0.048783173121456316}, "mmlu_human_aging": {"alias": " - human_aging", "acc,none": 0.515695067264574, "acc_stderr,none": 0.03354126575420809}, "mmlu_management": {"alias": " - management", "acc,none": 0.4854368932038835, "acc_stderr,none": 0.04948637324026637}, "mmlu_marketing": {"alias": " - marketing", "acc,none": 0.6538461538461539, "acc_stderr,none": 0.031166957367235907}, "mmlu_medical_genetics": {"alias": " - medical_genetics", "acc,none": 0.45, "acc_stderr,none": 0.049999999999999996}, "mmlu_miscellaneous": {"alias": " - miscellaneous", "acc,none": 0.5019157088122606, "acc_stderr,none": 0.017879832259026677}, "mmlu_nutrition": {"alias": " - nutrition", "acc,none": 0.48366013071895425, "acc_stderr,none": 0.028614624752805413}, "mmlu_professional_accounting": {"alias": " - professional_accounting", "acc,none": 0.32978723404255317, "acc_stderr,none": 0.0280459469420424}, "mmlu_professional_medicine": {"alias": " - professional_medicine", "acc,none": 0.28308823529411764, "acc_stderr,none": 0.02736586113151381}, "mmlu_virology": {"alias": " - virology", "acc,none": 0.39156626506024095, "acc_stderr,none": 0.03799857454479636}, "mmlu_social_sciences": {"acc,none": 0.4744881377965551, "acc_stderr,none": 0.00891768232553974, "alias": " - social sciences"}, "mmlu_econometrics": {"alias": " - econometrics", "acc,none": 0.2543859649122807, "acc_stderr,none": 0.040969851398436716}, "mmlu_high_school_geography": {"alias": " - high_school_geography", "acc,none": 0.47474747474747475, "acc_stderr,none": 0.035578062450873145}, "mmlu_high_school_government_and_politics": {"alias": " - high_school_government_and_politics", "acc,none": 0.5284974093264249, "acc_stderr,none": 0.036025735712884414}, "mmlu_high_school_macroeconomics": {"alias": " - high_school_macroeconomics", "acc,none": 0.4076923076923077, "acc_stderr,none": 0.024915243985987847}, "mmlu_high_school_microeconomics": {"alias": " - high_school_microeconomics", "acc,none": 0.40336134453781514, "acc_stderr,none": 0.03186608121408832}, "mmlu_high_school_psychology": {"alias": " - high_school_psychology", "acc,none": 0.5651376146788991, "acc_stderr,none": 0.021254631465609266}, "mmlu_human_sexuality": {"alias": " - human_sexuality", "acc,none": 0.5038167938931297, "acc_stderr,none": 0.043851623256015534}, "mmlu_professional_psychology": {"alias": " - professional_psychology", "acc,none": 0.43137254901960786, "acc_stderr,none": 0.02003639376835263}, "mmlu_public_relations": {"alias": " - public_relations", "acc,none": 0.5181818181818182, "acc_stderr,none": 0.04785964010794916}, "mmlu_security_studies": {"alias": " - security_studies", "acc,none": 0.47346938775510206, "acc_stderr,none": 0.03196412734523272}, "mmlu_sociology": {"alias": " - sociology", "acc,none": 0.5621890547263682, "acc_stderr,none": 0.035080801121998406}, "mmlu_us_foreign_policy": {"alias": " - us_foreign_policy", "acc,none": 0.56, "acc_stderr,none": 0.04988876515698589}, "mmlu_stem": {"acc,none": 0.33460196638122425, "acc_stderr,none": 0.00826650947161152, "alias": " - stem"}, "mmlu_abstract_algebra": {"alias": " - abstract_algebra", "acc,none": 0.25, "acc_stderr,none": 0.04351941398892446}, "mmlu_anatomy": {"alias": " - anatomy", "acc,none": 0.43703703703703706, "acc_stderr,none": 0.04284958639753399}, "mmlu_astronomy": {"alias": " - astronomy", "acc,none": 0.3684210526315789, "acc_stderr,none": 0.03925523381052932}, "mmlu_college_biology": {"alias": " - college_biology", "acc,none": 0.3819444444444444, "acc_stderr,none": 0.040629907841466674}, "mmlu_college_chemistry": {"alias": " - college_chemistry", "acc,none": 0.3, "acc_stderr,none": 0.046056618647183814}, "mmlu_college_computer_science": {"alias": " - college_computer_science", "acc,none": 0.41, "acc_stderr,none": 0.049431107042371025}, "mmlu_college_mathematics": {"alias": " - college_mathematics", "acc,none": 0.32, "acc_stderr,none": 0.04688261722621505}, "mmlu_college_physics": {"alias": " - college_physics", "acc,none": 0.20588235294117646, "acc_stderr,none": 0.04023382273617746}, "mmlu_computer_security": {"alias": " - computer_security", "acc,none": 0.54, "acc_stderr,none": 0.05009082659620332}, "mmlu_conceptual_physics": {"alias": " - conceptual_physics", "acc,none": 0.4, "acc_stderr,none": 0.03202563076101736}, "mmlu_electrical_engineering": {"alias": " - electrical_engineering", "acc,none": 0.42758620689655175, "acc_stderr,none": 0.04122737111370333}, "mmlu_elementary_mathematics": {"alias": " - elementary_mathematics", "acc,none": 0.26455026455026454, "acc_stderr,none": 0.0227174678977086}, "mmlu_high_school_biology": {"alias": " - high_school_biology", "acc,none": 0.4645161290322581, "acc_stderr,none": 0.028372287797962956}, "mmlu_high_school_chemistry": {"alias": " - high_school_chemistry", "acc,none": 0.270935960591133, "acc_stderr,none": 0.031270907132976984}, "mmlu_high_school_computer_science": {"alias": " - high_school_computer_science", "acc,none": 0.39, "acc_stderr,none": 0.04902071300001975}, "mmlu_high_school_mathematics": {"alias": " - high_school_mathematics", "acc,none": 0.2074074074074074, "acc_stderr,none": 0.024720713193952134}, "mmlu_high_school_physics": {"alias": " - high_school_physics", "acc,none": 0.24503311258278146, "acc_stderr,none": 0.03511807571804725}, "mmlu_high_school_statistics": {"alias": " - high_school_statistics", "acc,none": 0.25462962962962965, "acc_stderr,none": 0.029711275860005337}, "mmlu_machine_learning": {"alias": " - machine_learning", "acc,none": 0.35714285714285715, "acc_stderr,none": 0.04547960999764376}, "sciq": {"alias": "sciq", "acc,none": 0.94, "acc_stderr,none": 0.00751375115747492, "acc_norm,none": 0.931, "acc_norm_stderr,none": 0.008018934050315167}}
3
+ {"created_at": "2026-01-25T12:00:37.036354", "global_step": 90000, "commonsense_qa": {"alias": "commonsense_qa", "acc,none": 0.5413595413595413, "acc_stderr,none": 0.01426589962395503}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.5429197371041625, "acc_stderr,none": 0.004971364031062592, "acc_norm,none": 0.7235610436168094, "acc_norm_stderr,none": 0.004463224445470985}, "mmlu": {"acc,none": 0.4791340264919527, "acc_stderr,none": 0.004079826221154932, "alias": "mmlu"}, "mmlu_humanities": {"acc,none": 0.444845908607864, "acc_stderr,none": 0.0069622509886378536, "alias": " - humanities"}, "mmlu_formal_logic": {"alias": " - formal_logic", "acc,none": 0.46825396825396826, "acc_stderr,none": 0.04463112720677172}, "mmlu_high_school_european_history": {"alias": " - high_school_european_history", "acc,none": 0.6424242424242425, "acc_stderr,none": 0.037425970438065864}, "mmlu_high_school_us_history": {"alias": " - high_school_us_history", "acc,none": 0.6078431372549019, "acc_stderr,none": 0.03426712349247273}, "mmlu_high_school_world_history": {"alias": " - high_school_world_history", "acc,none": 0.6413502109704642, "acc_stderr,none": 0.031219569445301857}, "mmlu_international_law": {"alias": " - international_law", "acc,none": 0.6528925619834711, "acc_stderr,none": 0.04345724570292534}, "mmlu_jurisprudence": {"alias": " - jurisprudence", "acc,none": 0.5462962962962963, "acc_stderr,none": 0.04812917324536823}, "mmlu_logical_fallacies": {"alias": " - logical_fallacies", "acc,none": 0.558282208588957, "acc_stderr,none": 0.03901591825836184}, "mmlu_moral_disputes": {"alias": " - moral_disputes", "acc,none": 0.5549132947976878, "acc_stderr,none": 0.026756255129663765}, "mmlu_moral_scenarios": {"alias": " - moral_scenarios", "acc,none": 0.24022346368715083, "acc_stderr,none": 0.014288343803925305}, "mmlu_philosophy": {"alias": " - philosophy", "acc,none": 0.5691318327974276, "acc_stderr,none": 0.02812534098397271}, "mmlu_prehistory": {"alias": " - prehistory", "acc,none": 0.5, "acc_stderr,none": 0.02782074420373286}, "mmlu_professional_law": {"alias": " - professional_law", "acc,none": 0.37027379400260757, "acc_stderr,none": 0.012332930781256734}, "mmlu_world_religions": {"alias": " - world_religions", "acc,none": 0.6374269005847953, "acc_stderr,none": 0.036871306155620606}, "mmlu_other": {"acc,none": 0.5317026070164146, "acc_stderr,none": 0.008773248778937662, "alias": " - other"}, "mmlu_business_ethics": {"alias": " - business_ethics", "acc,none": 0.49, "acc_stderr,none": 0.05024183937956912}, "mmlu_clinical_knowledge": {"alias": " - clinical_knowledge", "acc,none": 0.5660377358490566, "acc_stderr,none": 0.030503292013342603}, "mmlu_college_medicine": {"alias": " - college_medicine", "acc,none": 0.48554913294797686, "acc_stderr,none": 0.03810871630454764}, "mmlu_global_facts": {"alias": " - global_facts", "acc,none": 0.3, "acc_stderr,none": 0.046056618647183814}, "mmlu_human_aging": {"alias": " - human_aging", "acc,none": 0.5560538116591929, "acc_stderr,none": 0.03334625674242728}, "mmlu_management": {"alias": " - management", "acc,none": 0.6699029126213593, "acc_stderr,none": 0.04656147110012349}, "mmlu_marketing": {"alias": " - marketing", "acc,none": 0.6923076923076923, "acc_stderr,none": 0.03023638994217309}, "mmlu_medical_genetics": {"alias": " - medical_genetics", "acc,none": 0.49, "acc_stderr,none": 0.05024183937956913}, "mmlu_miscellaneous": {"alias": " - miscellaneous", "acc,none": 0.6104725415070242, "acc_stderr,none": 0.01743808255626459}, "mmlu_nutrition": {"alias": " - nutrition", "acc,none": 0.5261437908496732, "acc_stderr,none": 0.028590752958852394}, "mmlu_professional_accounting": {"alias": " - professional_accounting", "acc,none": 0.32978723404255317, "acc_stderr,none": 0.028045946942042398}, "mmlu_professional_medicine": {"alias": " - professional_medicine", "acc,none": 0.4963235294117647, "acc_stderr,none": 0.030372015885428195}, "mmlu_virology": {"alias": " - virology", "acc,none": 0.40963855421686746, "acc_stderr,none": 0.038284011150790206}, "mmlu_social_sciences": {"acc,none": 0.5544361390965226, "acc_stderr,none": 0.008816771233976384, "alias": " - social sciences"}, "mmlu_econometrics": {"alias": " - econometrics", "acc,none": 0.2894736842105263, "acc_stderr,none": 0.04266339443159394}, "mmlu_high_school_geography": {"alias": " - high_school_geography", "acc,none": 0.5808080808080808, "acc_stderr,none": 0.03515520728670417}, "mmlu_high_school_government_and_politics": {"alias": " - high_school_government_and_politics", "acc,none": 0.6010362694300518, "acc_stderr,none": 0.03533999094065696}, "mmlu_high_school_macroeconomics": {"alias": " - high_school_macroeconomics", "acc,none": 0.4846153846153846, "acc_stderr,none": 0.02533900301010651}, "mmlu_high_school_microeconomics": {"alias": " - high_school_microeconomics", "acc,none": 0.5168067226890757, "acc_stderr,none": 0.03246013680375308}, "mmlu_high_school_psychology": {"alias": " - high_school_psychology", "acc,none": 0.6623853211009174, "acc_stderr,none": 0.02027526598663891}, "mmlu_human_sexuality": {"alias": " - human_sexuality", "acc,none": 0.5801526717557252, "acc_stderr,none": 0.04328577215262971}, "mmlu_professional_psychology": {"alias": " - professional_psychology", "acc,none": 0.47058823529411764, "acc_stderr,none": 0.02019280827143379}, "mmlu_public_relations": {"alias": " - public_relations", "acc,none": 0.5363636363636364, "acc_stderr,none": 0.047764491623961985}, "mmlu_security_studies": {"alias": " - security_studies", "acc,none": 0.5673469387755102, "acc_stderr,none": 0.031717528240626645}, "mmlu_sociology": {"alias": " - sociology", "acc,none": 0.6965174129353234, "acc_stderr,none": 0.03251006816458618}, "mmlu_us_foreign_policy": {"alias": " - us_foreign_policy", "acc,none": 0.67, "acc_stderr,none": 0.04725815626252609}, "mmlu_stem": {"acc,none": 0.40501110053916906, "acc_stderr,none": 0.008568999338569704, "alias": " - stem"}, "mmlu_abstract_algebra": {"alias": " - abstract_algebra", "acc,none": 0.25, "acc_stderr,none": 0.04351941398892446}, "mmlu_anatomy": {"alias": " - anatomy", "acc,none": 0.45925925925925926, "acc_stderr,none": 0.04304979692464243}, "mmlu_astronomy": {"alias": " - astronomy", "acc,none": 0.5197368421052632, "acc_stderr,none": 0.040657710025626036}, "mmlu_college_biology": {"alias": " - college_biology", "acc,none": 0.4513888888888889, "acc_stderr,none": 0.04161402398403279}, "mmlu_college_chemistry": {"alias": " - college_chemistry", "acc,none": 0.3, "acc_stderr,none": 0.046056618647183814}, "mmlu_college_computer_science": {"alias": " - college_computer_science", "acc,none": 0.47, "acc_stderr,none": 0.050161355804659205}, "mmlu_college_mathematics": {"alias": " - college_mathematics", "acc,none": 0.31, "acc_stderr,none": 0.04648231987117317}, "mmlu_college_physics": {"alias": " - college_physics", "acc,none": 0.27450980392156865, "acc_stderr,none": 0.044405219061793275}, "mmlu_computer_security": {"alias": " - computer_security", "acc,none": 0.55, "acc_stderr,none": 0.049999999999999996}, "mmlu_conceptual_physics": {"alias": " - conceptual_physics", "acc,none": 0.43829787234042555, "acc_stderr,none": 0.032436186361081004}, "mmlu_electrical_engineering": {"alias": " - electrical_engineering", "acc,none": 0.4896551724137931, "acc_stderr,none": 0.041657747757287644}, "mmlu_elementary_mathematics": {"alias": " - elementary_mathematics", "acc,none": 0.34656084656084657, "acc_stderr,none": 0.024508777521028424}, "mmlu_high_school_biology": {"alias": " - high_school_biology", "acc,none": 0.6, "acc_stderr,none": 0.027869320571664625}, "mmlu_high_school_chemistry": {"alias": " - high_school_chemistry", "acc,none": 0.3842364532019704, "acc_stderr,none": 0.0342239856565755}, "mmlu_high_school_computer_science": {"alias": " - high_school_computer_science", "acc,none": 0.43, "acc_stderr,none": 0.04975698519562428}, "mmlu_high_school_mathematics": {"alias": " - high_school_mathematics", "acc,none": 0.2518518518518518, "acc_stderr,none": 0.02646611753895991}, "mmlu_high_school_physics": {"alias": " - high_school_physics", "acc,none": 0.31788079470198677, "acc_stderr,none": 0.038020397601079024}, "mmlu_high_school_statistics": {"alias": " - high_school_statistics", "acc,none": 0.4212962962962963, "acc_stderr,none": 0.03367462138896078}, "mmlu_machine_learning": {"alias": " - machine_learning", "acc,none": 0.32142857142857145, "acc_stderr,none": 0.0443280405529152}, "sciq": {"alias": "sciq", "acc,none": 0.957, "acc_stderr,none": 0.006418114379799741, "acc_norm,none": 0.946, "acc_norm_stderr,none": 0.007150883521295434}}
4
+ {"created_at": "2026-01-26T16:34:04.253288", "global_step": 120000, "commonsense_qa": {"alias": "commonsense_qa", "acc,none": 0.5151515151515151, "acc_stderr,none": 0.0143083843238111}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.5707030472017527, "acc_stderr,none": 0.004939642460172587, "acc_norm,none": 0.7626966739693288, "acc_norm_stderr,none": 0.004245602744443542}, "mmlu": {"acc,none": 0.5225751317476143, "acc_stderr,none": 0.003982472226269284, "alias": "mmlu"}, "mmlu_humanities": {"acc,none": 0.4729011689691817, "acc_stderr,none": 0.006823781782444833, "alias": " - humanities"}, "mmlu_formal_logic": {"alias": " - formal_logic", "acc,none": 0.2857142857142857, "acc_stderr,none": 0.0404061017820884}, "mmlu_high_school_european_history": {"alias": " - high_school_european_history", "acc,none": 0.7090909090909091, "acc_stderr,none": 0.03546563019624337}, "mmlu_high_school_us_history": {"alias": " - high_school_us_history", "acc,none": 0.6911764705882353, "acc_stderr,none": 0.03242661719827218}, "mmlu_high_school_world_history": {"alias": " - high_school_world_history", "acc,none": 0.7172995780590717, "acc_stderr,none": 0.029312814153955914}, "mmlu_international_law": {"alias": " - international_law", "acc,none": 0.743801652892562, "acc_stderr,none": 0.03984979653302872}, "mmlu_jurisprudence": {"alias": " - jurisprudence", "acc,none": 0.5740740740740741, "acc_stderr,none": 0.047803436269367894}, "mmlu_logical_fallacies": {"alias": " - logical_fallacies", "acc,none": 0.6319018404907976, "acc_stderr,none": 0.03789213935838396}, "mmlu_moral_disputes": {"alias": " - moral_disputes", "acc,none": 0.5982658959537572, "acc_stderr,none": 0.02639410417764363}, "mmlu_moral_scenarios": {"alias": " - moral_scenarios", "acc,none": 0.23798882681564246, "acc_stderr,none": 0.014242630070574885}, "mmlu_philosophy": {"alias": " - philosophy", "acc,none": 0.5787781350482315, "acc_stderr,none": 0.028043399858210624}, "mmlu_prehistory": {"alias": " - prehistory", "acc,none": 0.6327160493827161, "acc_stderr,none": 0.02682280175950789}, "mmlu_professional_law": {"alias": " - professional_law", "acc,none": 0.3748370273794003, "acc_stderr,none": 0.012363652467551936}, "mmlu_world_religions": {"alias": " - world_religions", "acc,none": 0.7368421052631579, "acc_stderr,none": 0.03377310252209205}, "mmlu_other": {"acc,none": 0.5864177663340844, "acc_stderr,none": 0.008534195465865826, "alias": " - other"}, "mmlu_business_ethics": {"alias": " - business_ethics", "acc,none": 0.52, "acc_stderr,none": 0.050211673156867795}, "mmlu_clinical_knowledge": {"alias": " - clinical_knowledge", "acc,none": 0.569811320754717, "acc_stderr,none": 0.030471445867183235}, "mmlu_college_medicine": {"alias": " - college_medicine", "acc,none": 0.5028901734104047, "acc_stderr,none": 0.03812400565974833}, "mmlu_global_facts": {"alias": " - global_facts", "acc,none": 0.33, "acc_stderr,none": 0.04725815626252604}, "mmlu_human_aging": {"alias": " - human_aging", "acc,none": 0.6278026905829597, "acc_stderr,none": 0.03244305283008731}, "mmlu_management": {"alias": " - management", "acc,none": 0.7961165048543689, "acc_stderr,none": 0.03989139859531771}, "mmlu_marketing": {"alias": " - marketing", "acc,none": 0.8205128205128205, "acc_stderr,none": 0.025140935950335445}, "mmlu_medical_genetics": {"alias": " - medical_genetics", "acc,none": 0.54, "acc_stderr,none": 0.05009082659620333}, "mmlu_miscellaneous": {"alias": " - miscellaneous", "acc,none": 0.6909323116219668, "acc_stderr,none": 0.016524988919702194}, "mmlu_nutrition": {"alias": " - nutrition", "acc,none": 0.5882352941176471, "acc_stderr,none": 0.02818059632825929}, "mmlu_professional_accounting": {"alias": " - professional_accounting", "acc,none": 0.3829787234042553, "acc_stderr,none": 0.028999080904806178}, "mmlu_professional_medicine": {"alias": " - professional_medicine", "acc,none": 0.47058823529411764, "acc_stderr,none": 0.03032024326500413}, "mmlu_virology": {"alias": " - virology", "acc,none": 0.4457831325301205, "acc_stderr,none": 0.03869543323472101}, "mmlu_social_sciences": {"acc,none": 0.6321091972700682, "acc_stderr,none": 0.00842894298430653, "alias": " - social sciences"}, "mmlu_econometrics": {"alias": " - econometrics", "acc,none": 0.30701754385964913, "acc_stderr,none": 0.0433913832257986}, "mmlu_high_school_geography": {"alias": " - high_school_geography", "acc,none": 0.696969696969697, "acc_stderr,none": 0.03274287914026868}, "mmlu_high_school_government_and_politics": {"alias": " - high_school_government_and_politics", "acc,none": 0.7875647668393783, "acc_stderr,none": 0.02951928261681721}, "mmlu_high_school_macroeconomics": {"alias": " - high_school_macroeconomics", "acc,none": 0.5025641025641026, "acc_stderr,none": 0.025350672979412195}, "mmlu_high_school_microeconomics": {"alias": " - high_school_microeconomics", "acc,none": 0.5798319327731093, "acc_stderr,none": 0.03206183783236152}, "mmlu_high_school_psychology": {"alias": " - high_school_psychology", "acc,none": 0.7467889908256881, "acc_stderr,none": 0.018644073041375046}, "mmlu_human_sexuality": {"alias": " - human_sexuality", "acc,none": 0.6564885496183206, "acc_stderr,none": 0.041649760719448786}, "mmlu_professional_psychology": {"alias": " - professional_psychology", "acc,none": 0.5392156862745098, "acc_stderr,none": 0.020165523313907904}, "mmlu_public_relations": {"alias": " - public_relations", "acc,none": 0.5818181818181818, "acc_stderr,none": 0.04724577405731571}, "mmlu_security_studies": {"alias": " - security_studies", "acc,none": 0.6612244897959184, "acc_stderr,none": 0.030299506562154185}, "mmlu_sociology": {"alias": " - sociology", "acc,none": 0.8159203980099502, "acc_stderr,none": 0.027403859410786862}, "mmlu_us_foreign_policy": {"alias": " - us_foreign_policy", "acc,none": 0.73, "acc_stderr,none": 0.044619604333847394}, "mmlu_stem": {"acc,none": 0.426895020615287, "acc_stderr,none": 0.008514837335164957, "alias": " - stem"}, "mmlu_abstract_algebra": {"alias": " - abstract_algebra", "acc,none": 0.26, "acc_stderr,none": 0.044084400227680794}, "mmlu_anatomy": {"alias": " - anatomy", "acc,none": 0.5037037037037037, "acc_stderr,none": 0.04319223625811331}, "mmlu_astronomy": {"alias": " - astronomy", "acc,none": 0.5921052631578947, "acc_stderr,none": 0.039993097127774734}, "mmlu_college_biology": {"alias": " - college_biology", "acc,none": 0.5625, "acc_stderr,none": 0.04148415739394154}, "mmlu_college_chemistry": {"alias": " - college_chemistry", "acc,none": 0.38, "acc_stderr,none": 0.04878317312145632}, "mmlu_college_computer_science": {"alias": " - college_computer_science", "acc,none": 0.45, "acc_stderr,none": 0.05}, "mmlu_college_mathematics": {"alias": " - college_mathematics", "acc,none": 0.31, "acc_stderr,none": 0.04648231987117316}, "mmlu_college_physics": {"alias": " - college_physics", "acc,none": 0.30392156862745096, "acc_stderr,none": 0.04576665403207762}, "mmlu_computer_security": {"alias": " - computer_security", "acc,none": 0.76, "acc_stderr,none": 0.04292346959909283}, "mmlu_conceptual_physics": {"alias": " - conceptual_physics", "acc,none": 0.39574468085106385, "acc_stderr,none": 0.031967586978353627}, "mmlu_electrical_engineering": {"alias": " - electrical_engineering", "acc,none": 0.4689655172413793, "acc_stderr,none": 0.04158632762097828}, "mmlu_elementary_mathematics": {"alias": " - elementary_mathematics", "acc,none": 0.3386243386243386, "acc_stderr,none": 0.024373197867983056}, "mmlu_high_school_biology": {"alias": " - high_school_biology", "acc,none": 0.6451612903225806, "acc_stderr,none": 0.027218889773308753}, "mmlu_high_school_chemistry": {"alias": " - high_school_chemistry", "acc,none": 0.39901477832512317, "acc_stderr,none": 0.03445487686264716}, "mmlu_high_school_computer_science": {"alias": " - high_school_computer_science", "acc,none": 0.49, "acc_stderr,none": 0.05024183937956912}, "mmlu_high_school_mathematics": {"alias": " - high_school_mathematics", "acc,none": 0.24074074074074073, "acc_stderr,none": 0.0260671592222758}, "mmlu_high_school_physics": {"alias": " - high_school_physics", "acc,none": 0.33112582781456956, "acc_stderr,none": 0.038425817186598696}, "mmlu_high_school_statistics": {"alias": " - high_school_statistics", "acc,none": 0.36574074074074076, "acc_stderr,none": 0.03284738857647207}, "mmlu_machine_learning": {"alias": " - machine_learning", "acc,none": 0.41964285714285715, "acc_stderr,none": 0.04684099321077106}, "sciq": {"alias": "sciq", "acc,none": 0.961, "acc_stderr,none": 0.006125072776426136, "acc_norm,none": 0.944, "acc_norm_stderr,none": 0.0072744014816970536}}
5
+ {"created_at": "2026-01-27T20:45:20.043281", "global_step": 150000, "commonsense_qa": {"alias": "commonsense_qa", "acc,none": 0.5986895986895987, "acc_stderr,none": 0.014033343288790847}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.5915156343357897, "acc_stderr,none": 0.00490548949400508, "acc_norm,none": 0.7803226448914559, "acc_norm_stderr,none": 0.004131818797713887}, "mmlu": {"acc,none": 0.568651189289275, "acc_stderr,none": 0.003919796777450193, "alias": "mmlu"}, "mmlu_humanities": {"acc,none": 0.5107332624867162, "acc_stderr,none": 0.006740673989280729, "alias": " - humanities"}, "mmlu_formal_logic": {"alias": " - formal_logic", "acc,none": 0.3492063492063492, "acc_stderr,none": 0.04263906892795131}, "mmlu_high_school_european_history": {"alias": " - high_school_european_history", "acc,none": 0.7333333333333333, "acc_stderr,none": 0.03453131801885415}, "mmlu_high_school_us_history": {"alias": " - high_school_us_history", "acc,none": 0.7303921568627451, "acc_stderr,none": 0.03114557065948678}, "mmlu_high_school_world_history": {"alias": " - high_school_world_history", "acc,none": 0.7763713080168776, "acc_stderr,none": 0.027123298205229966}, "mmlu_international_law": {"alias": " - international_law", "acc,none": 0.768595041322314, "acc_stderr,none": 0.03849856098794088}, "mmlu_jurisprudence": {"alias": " - jurisprudence", "acc,none": 0.6481481481481481, "acc_stderr,none": 0.046166311118017125}, "mmlu_logical_fallacies": {"alias": " - logical_fallacies", "acc,none": 0.6932515337423313, "acc_stderr,none": 0.036230899157241474}, "mmlu_moral_disputes": {"alias": " - moral_disputes", "acc,none": 0.653179190751445, "acc_stderr,none": 0.025624723994030457}, "mmlu_moral_scenarios": {"alias": " - moral_scenarios", "acc,none": 0.24022346368715083, "acc_stderr,none": 0.014288343803925305}, "mmlu_philosophy": {"alias": " - philosophy", "acc,none": 0.6655948553054662, "acc_stderr,none": 0.026795422327893934}, "mmlu_prehistory": {"alias": " - prehistory", "acc,none": 0.6635802469135802, "acc_stderr,none": 0.02628973494595293}, "mmlu_professional_law": {"alias": " - professional_law", "acc,none": 0.408735332464146, "acc_stderr,none": 0.012555701346703384}, "mmlu_world_religions": {"alias": " - world_religions", "acc,none": 0.8128654970760234, "acc_stderr,none": 0.029913127232368036}, "mmlu_other": {"acc,none": 0.647891857096878, "acc_stderr,none": 0.008288086901916963, "alias": " - other"}, "mmlu_business_ethics": {"alias": " - business_ethics", "acc,none": 0.62, "acc_stderr,none": 0.04878317312145632}, "mmlu_clinical_knowledge": {"alias": " - clinical_knowledge", "acc,none": 0.6415094339622641, "acc_stderr,none": 0.029514703583981772}, "mmlu_college_medicine": {"alias": " - college_medicine", "acc,none": 0.6127167630057804, "acc_stderr,none": 0.03714325906302064}, "mmlu_global_facts": {"alias": " - global_facts", "acc,none": 0.38, "acc_stderr,none": 0.048783173121456316}, "mmlu_human_aging": {"alias": " - human_aging", "acc,none": 0.6457399103139013, "acc_stderr,none": 0.03210062154134986}, "mmlu_management": {"alias": " - management", "acc,none": 0.7766990291262136, "acc_stderr,none": 0.04123553189891431}, "mmlu_marketing": {"alias": " - marketing", "acc,none": 0.8333333333333334, "acc_stderr,none": 0.024414947304543688}, "mmlu_medical_genetics": {"alias": " - medical_genetics", "acc,none": 0.57, "acc_stderr,none": 0.049756985195624284}, "mmlu_miscellaneous": {"alias": " - miscellaneous", "acc,none": 0.776500638569604, "acc_stderr,none": 0.01489723522945071}, "mmlu_nutrition": {"alias": " - nutrition", "acc,none": 0.6339869281045751, "acc_stderr,none": 0.02758281141515962}, "mmlu_professional_accounting": {"alias": " - professional_accounting", "acc,none": 0.4432624113475177, "acc_stderr,none": 0.029634838473766006}, "mmlu_professional_medicine": {"alias": " - professional_medicine", "acc,none": 0.5477941176470589, "acc_stderr,none": 0.030233758551596435}, "mmlu_virology": {"alias": " - virology", "acc,none": 0.5120481927710844, "acc_stderr,none": 0.03891364495835817}, "mmlu_social_sciences": {"acc,none": 0.6766330841728957, "acc_stderr,none": 0.008165674800207875, "alias": " - social sciences"}, "mmlu_econometrics": {"alias": " - econometrics", "acc,none": 0.2982456140350877, "acc_stderr,none": 0.04303684033537317}, "mmlu_high_school_geography": {"alias": " - high_school_geography", "acc,none": 0.7171717171717171, "acc_stderr,none": 0.03208779558786752}, "mmlu_high_school_government_and_politics": {"alias": " - high_school_government_and_politics", "acc,none": 0.8031088082901554, "acc_stderr,none": 0.02869787397186068}, "mmlu_high_school_macroeconomics": {"alias": " - high_school_macroeconomics", "acc,none": 0.5794871794871795, "acc_stderr,none": 0.02502861027671086}, "mmlu_high_school_microeconomics": {"alias": " - high_school_microeconomics", "acc,none": 0.6470588235294118, "acc_stderr,none": 0.031041941304059278}, "mmlu_high_school_psychology": {"alias": " - high_school_psychology", "acc,none": 0.8036697247706422, "acc_stderr,none": 0.017030719339154364}, "mmlu_human_sexuality": {"alias": " - human_sexuality", "acc,none": 0.7480916030534351, "acc_stderr,none": 0.03807387116306086}, "mmlu_professional_psychology": {"alias": " - professional_psychology", "acc,none": 0.576797385620915, "acc_stderr,none": 0.019987809769482064}, "mmlu_public_relations": {"alias": " - public_relations", "acc,none": 0.6363636363636364, "acc_stderr,none": 0.04607582090719976}, "mmlu_security_studies": {"alias": " - security_studies", "acc,none": 0.6938775510204082, "acc_stderr,none": 0.029504896454595957}, "mmlu_sociology": {"alias": " - sociology", "acc,none": 0.8159203980099502, "acc_stderr,none": 0.027403859410786845}, "mmlu_us_foreign_policy": {"alias": " - us_foreign_policy", "acc,none": 0.78, "acc_stderr,none": 0.04163331998932261}, "mmlu_stem": {"acc,none": 0.4716143355534412, "acc_stderr,none": 0.008565947331469707, "alias": " - stem"}, "mmlu_abstract_algebra": {"alias": " - abstract_algebra", "acc,none": 0.28, "acc_stderr,none": 0.045126085985421276}, "mmlu_anatomy": {"alias": " - anatomy", "acc,none": 0.5259259259259259, "acc_stderr,none": 0.04313531696750574}, "mmlu_astronomy": {"alias": " - astronomy", "acc,none": 0.6644736842105263, "acc_stderr,none": 0.038424985593952694}, "mmlu_college_biology": {"alias": " - college_biology", "acc,none": 0.6666666666666666, "acc_stderr,none": 0.039420826399272135}, "mmlu_college_chemistry": {"alias": " - college_chemistry", "acc,none": 0.37, "acc_stderr,none": 0.04852365870939099}, "mmlu_college_computer_science": {"alias": " - college_computer_science", "acc,none": 0.44, "acc_stderr,none": 0.049888765156985884}, "mmlu_college_mathematics": {"alias": " - college_mathematics", "acc,none": 0.37, "acc_stderr,none": 0.04852365870939099}, "mmlu_college_physics": {"alias": " - college_physics", "acc,none": 0.3235294117647059, "acc_stderr,none": 0.046550104113196177}, "mmlu_computer_security": {"alias": " - computer_security", "acc,none": 0.72, "acc_stderr,none": 0.04512608598542128}, "mmlu_conceptual_physics": {"alias": " - conceptual_physics", "acc,none": 0.5361702127659574, "acc_stderr,none": 0.03260038511835771}, "mmlu_electrical_engineering": {"alias": " - electrical_engineering", "acc,none": 0.4827586206896552, "acc_stderr,none": 0.04164188720169377}, "mmlu_elementary_mathematics": {"alias": " - elementary_mathematics", "acc,none": 0.35714285714285715, "acc_stderr,none": 0.024677862841332783}, "mmlu_high_school_biology": {"alias": " - high_school_biology", "acc,none": 0.6838709677419355, "acc_stderr,none": 0.026450874489042767}, "mmlu_high_school_chemistry": {"alias": " - high_school_chemistry", "acc,none": 0.47783251231527096, "acc_stderr,none": 0.03514528562175007}, "mmlu_high_school_computer_science": {"alias": " - high_school_computer_science", "acc,none": 0.58, "acc_stderr,none": 0.049604496374885836}, "mmlu_high_school_mathematics": {"alias": " - high_school_mathematics", "acc,none": 0.2777777777777778, "acc_stderr,none": 0.027309140588230182}, "mmlu_high_school_physics": {"alias": " - high_school_physics", "acc,none": 0.32450331125827814, "acc_stderr,none": 0.03822746937658753}, "mmlu_high_school_statistics": {"alias": " - high_school_statistics", "acc,none": 0.4305555555555556, "acc_stderr,none": 0.03376922151252336}, "mmlu_machine_learning": {"alias": " - machine_learning", "acc,none": 0.4732142857142857, "acc_stderr,none": 0.047389751192741546}, "sciq": {"alias": "sciq", "acc,none": 0.963, "acc_stderr,none": 0.005972157622389646, "acc_norm,none": 0.939, "acc_norm_stderr,none": 0.007572076091557424}}