Upload gpt2/metrics.eval.jsonl with huggingface_hub
Browse files- gpt2/metrics.eval.jsonl +3 -0
gpt2/metrics.eval.jsonl
CHANGED
|
@@ -45,3 +45,6 @@
|
|
| 45 |
{"created_at": "2025-08-26T17:32:36.788652", "global_step": 90000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.32081911262798635, "acc_stderr,none": 0.01364094309194653, "acc_norm,none": 0.3464163822525597, "acc_norm_stderr,none": 0.01390501118006324}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.6788720538720538, "acc_stderr,none": 0.009580787536986797, "acc_norm,none": 0.625, "acc_norm_stderr,none": 0.009933992677987828}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.2588888888888889, "acc_stderr,none": 0.014608933836168465, "acc_norm,none": 0.2588888888888889, "acc_norm_stderr,none": 0.014608933836168465}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.23444444444444446, "acc_stderr,none": 0.014129554968110773, "acc_norm,none": 0.23444444444444446, "acc_norm_stderr,none": 0.014129554968110773}, "belebele_pes_Arab": {"alias": "belebele_pes_Arab", "acc,none": 0.25333333333333335, "acc_stderr,none": 0.014505399844356785, "acc_norm,none": 0.25333333333333335, "acc_norm_stderr,none": 0.014505399844356785}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.23666666666666666, "acc_stderr,none": 0.01417574247439196, "acc_norm,none": 0.23666666666666666, "acc_norm_stderr,none": 0.01417574247439196}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.23555555555555555, "acc_stderr,none": 0.014152716079132634, "acc_norm,none": 0.23555555555555555, "acc_norm_stderr,none": 0.014152716079132634}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.41346345349531965, "acc_stderr,none": 0.0049144805345337165, "acc_norm,none": 0.5302728540131448, "acc_norm_stderr,none": 0.0049806272871475744}, "include_base_44_chinese": {"acc,none": 0.23853211009174313, "acc_stderr,none": 0.018253543111233585, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295698}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.21839080459770116, "acc_stderr,none": 0.044551545932103705}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.3380281690140845, "acc_stderr,none": 0.05653887739133514}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.15492957746478872, "acc_stderr,none": 0.0432478576664078}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.25287356321839083, "acc_stderr,none": 0.04687049503854672}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.19718309859154928, "acc_stderr,none": 0.04755476905953276}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.3125, "acc_stderr,none": 0.11967838846954226}, "include_base_44_italian": {"acc,none": 0.25364963503649635, "acc_stderr,none": 0.01853490887549456, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.11428571428571428, "acc_stderr,none": 0.05456364060755607}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.2934131736526946, "acc_stderr,none": 0.03534016139050468}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.08333333333333333, "acc_stderr,none": 0.08333333333333331}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.2903225806451613, "acc_stderr,none": 0.036577207065409116}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.2275449101796407, "acc_stderr,none": 0.032539894331085194}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.16666666666666666, "acc_stderr,none": 0.11236664374387367}, "include_base_44_turkish": {"acc,none": 0.2572992700729927, "acc_stderr,none": 0.018693533371887432, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.27710843373493976, "acc_stderr,none": 0.03484331592680589}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.21084337349397592, "acc_stderr,none": 0.031755547866299194}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.2710843373493976, "acc_stderr,none": 0.03460579907553027}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.3, "acc_stderr,none": 0.0654653670707977}, "piqa": {"alias": "piqa", "acc,none": 0.7149075081610446, "acc_stderr,none": 0.010533270588738933, "acc_norm,none": 0.7100108813928183, "acc_norm_stderr,none": 0.010586899128169328}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.5100401606425703, "acc_stderr,none": 0.010020052116889137}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.46224899598393576, "acc_stderr,none": 0.009993466360872783}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3401606425702811, "acc_stderr,none": 0.009496174608136407}}
|
| 46 |
{"created_at": "2025-08-26T19:25:48.149463", "global_step": 92000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.3216723549488055, "acc_stderr,none": 0.013650488084494162, "acc_norm,none": 0.34044368600682595, "acc_norm_stderr,none": 0.013847460518892976}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.6826599326599326, "acc_stderr,none": 0.00955064834394777, "acc_norm,none": 0.6258417508417509, "acc_norm_stderr,none": 0.009929516948977625}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.2644444444444444, "acc_stderr,none": 0.01470940541341314, "acc_norm,none": 0.2644444444444444, "acc_norm_stderr,none": 0.01470940541341314}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.25333333333333335, "acc_stderr,none": 0.014505399844356812, "acc_norm,none": 0.25333333333333335, "acc_norm_stderr,none": 0.014505399844356812}, "belebele_pes_Arab": {"alias": "belebele_pes_Arab", "acc,none": 0.24333333333333335, "acc_stderr,none": 0.014311107963683047, "acc_norm,none": 0.24333333333333335, "acc_norm_stderr,none": 0.014311107963683047}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.25222222222222224, "acc_stderr,none": 0.014484319811433905, "acc_norm,none": 0.25222222222222224, "acc_norm_stderr,none": 0.014484319811433905}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884507, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884507}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.41266679944234214, "acc_stderr,none": 0.004913076844433762, "acc_norm,none": 0.533559051981677, "acc_norm_stderr,none": 0.004978529642140934}, "include_base_44_chinese": {"acc,none": 0.26788990825688075, "acc_stderr,none": 0.018865135158740404, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.18309859154929578, "acc_stderr,none": 0.046225147349214284}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.3333333333333333, "acc_stderr,none": 0.05083285677753486}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295698}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.15492957746478872, "acc_stderr,none": 0.04324785766640778}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.3448275862068966, "acc_stderr,none": 0.05125421389342353}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.3380281690140845, "acc_stderr,none": 0.05653887739133513}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.25, "acc_stderr,none": 0.11180339887498948}, "include_base_44_italian": {"acc,none": 0.2718978102189781, "acc_stderr,none": 0.019072515820800818, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.22857142857142856, "acc_stderr,none": 0.07201440432144052}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.2994011976047904, "acc_stderr,none": 0.03554736535384932}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.16666666666666666, "acc_stderr,none": 0.1123666437438737}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.27741935483870966, "acc_stderr,none": 0.03607872492487903}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.25149700598802394, "acc_stderr,none": 0.03367511880168704}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.3333333333333333, "acc_stderr,none": 0.14213381090374033}, "include_base_44_turkish": {"acc,none": 0.25364963503649635, "acc_stderr,none": 0.018616624984384385, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.27710843373493976, "acc_stderr,none": 0.03484331592680589}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.21686746987951808, "acc_stderr,none": 0.03208284450356365}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.25301204819277107, "acc_stderr,none": 0.03384429155233135}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.3, "acc_stderr,none": 0.0654653670707977}, "piqa": {"alias": "piqa", "acc,none": 0.7105549510337323, "acc_stderr,none": 0.010581014740675602, "acc_norm,none": 0.7083786724700761, "acc_norm_stderr,none": 0.01060444152742878}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.5044176706827309, "acc_stderr,none": 0.010021681681769335}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.4610441767068273, "acc_stderr,none": 0.009991608448389063}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3401606425702811, "acc_stderr,none": 0.009496174608136407}}
|
| 47 |
{"created_at": "2025-08-27T05:05:32.699170", "global_step": 94000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.3191126279863481, "acc_stderr,none": 0.01362169611917331, "acc_norm,none": 0.34215017064846415, "acc_norm_stderr,none": 0.013864152159177278}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.6835016835016835, "acc_stderr,none": 0.009543851857323888, "acc_norm,none": 0.6317340067340067, "acc_norm_stderr,none": 0.00989728620901089}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.2588888888888889, "acc_stderr,none": 0.01460893383616847, "acc_norm,none": 0.2588888888888889, "acc_norm_stderr,none": 0.01460893383616847}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.24333333333333335, "acc_stderr,none": 0.014311107963683052, "acc_norm,none": 0.24333333333333335, "acc_norm_stderr,none": 0.014311107963683052}, "belebele_pes_Arab": {"alias": "belebele_pes_Arab", "acc,none": 0.2611111111111111, "acc_stderr,none": 0.01464948638526213, "acc_norm,none": 0.2611111111111111, "acc_norm_stderr,none": 0.01464948638526213}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.25333333333333335, "acc_stderr,none": 0.014505399844356791, "acc_norm,none": 0.25333333333333335, "acc_norm_stderr,none": 0.014505399844356791}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.24333333333333335, "acc_stderr,none": 0.01431110796368305, "acc_norm,none": 0.24333333333333335, "acc_norm_stderr,none": 0.01431110796368305}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.41266679944234214, "acc_stderr,none": 0.004913076844433765, "acc_norm,none": 0.5345548695478988, "acc_norm_stderr,none": 0.00497785116190439}, "include_base_44_chinese": {"acc,none": 0.24220183486238533, "acc_stderr,none": 0.018347969547069576, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.27586206896551724, "acc_stderr,none": 0.048195602891152295}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.323943661971831, "acc_stderr,none": 0.05593416612923642}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.15492957746478872, "acc_stderr,none": 0.04324785766640782}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.26436781609195403, "acc_stderr,none": 0.04755382188278443}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.04993832453115033}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.18309859154929578, "acc_stderr,none": 0.04622514734921427}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.3125, "acc_stderr,none": 0.11967838846954226}, "include_base_44_italian": {"acc,none": 0.2427007299270073, "acc_stderr,none": 0.01830835184282819, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.08571428571428572, "acc_stderr,none": 0.048009602880960345}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.2754491017964072, "acc_stderr,none": 0.03467377173717454}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.16666666666666666, "acc_stderr,none": 0.11236664374387369}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.25806451612903225, "acc_stderr,none": 0.03526036935484004}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.23353293413173654, "acc_stderr,none": 0.03283724952964298}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_turkish": {"acc,none": 0.24087591240875914, "acc_stderr,none": 0.018302956462663326, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.26506024096385544, "acc_stderr,none": 0.03436024037944966}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.20481927710843373, "acc_stderr,none": 0.03141784291663926}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.2469879518072289, "acc_stderr,none": 0.03357351982064536}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.26, "acc_stderr,none": 0.06266203485560373}, "piqa": {"alias": "piqa", "acc,none": 0.7132752992383025, "acc_stderr,none": 0.010551314503108066, "acc_norm,none": 0.7072905331882481, "acc_norm_stderr,none": 0.010616044462393094}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.506425702811245, "acc_stderr,none": 0.010021245217159377}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.4646586345381526, "acc_stderr,none": 0.009997006138567233}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3385542168674699, "acc_stderr,none": 0.00948525020851688}}
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
{"created_at": "2025-08-26T17:32:36.788652", "global_step": 90000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.32081911262798635, "acc_stderr,none": 0.01364094309194653, "acc_norm,none": 0.3464163822525597, "acc_norm_stderr,none": 0.01390501118006324}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.6788720538720538, "acc_stderr,none": 0.009580787536986797, "acc_norm,none": 0.625, "acc_norm_stderr,none": 0.009933992677987828}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.2588888888888889, "acc_stderr,none": 0.014608933836168465, "acc_norm,none": 0.2588888888888889, "acc_norm_stderr,none": 0.014608933836168465}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.23444444444444446, "acc_stderr,none": 0.014129554968110773, "acc_norm,none": 0.23444444444444446, "acc_norm_stderr,none": 0.014129554968110773}, "belebele_pes_Arab": {"alias": "belebele_pes_Arab", "acc,none": 0.25333333333333335, "acc_stderr,none": 0.014505399844356785, "acc_norm,none": 0.25333333333333335, "acc_norm_stderr,none": 0.014505399844356785}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.23666666666666666, "acc_stderr,none": 0.01417574247439196, "acc_norm,none": 0.23666666666666666, "acc_norm_stderr,none": 0.01417574247439196}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.23555555555555555, "acc_stderr,none": 0.014152716079132634, "acc_norm,none": 0.23555555555555555, "acc_norm_stderr,none": 0.014152716079132634}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.41346345349531965, "acc_stderr,none": 0.0049144805345337165, "acc_norm,none": 0.5302728540131448, "acc_norm_stderr,none": 0.0049806272871475744}, "include_base_44_chinese": {"acc,none": 0.23853211009174313, "acc_stderr,none": 0.018253543111233585, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295698}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.21839080459770116, "acc_stderr,none": 0.044551545932103705}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.3380281690140845, "acc_stderr,none": 0.05653887739133514}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.15492957746478872, "acc_stderr,none": 0.0432478576664078}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.25287356321839083, "acc_stderr,none": 0.04687049503854672}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.19718309859154928, "acc_stderr,none": 0.04755476905953276}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.3125, "acc_stderr,none": 0.11967838846954226}, "include_base_44_italian": {"acc,none": 0.25364963503649635, "acc_stderr,none": 0.01853490887549456, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.11428571428571428, "acc_stderr,none": 0.05456364060755607}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.2934131736526946, "acc_stderr,none": 0.03534016139050468}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.08333333333333333, "acc_stderr,none": 0.08333333333333331}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.2903225806451613, "acc_stderr,none": 0.036577207065409116}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.2275449101796407, "acc_stderr,none": 0.032539894331085194}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.16666666666666666, "acc_stderr,none": 0.11236664374387367}, "include_base_44_turkish": {"acc,none": 0.2572992700729927, "acc_stderr,none": 0.018693533371887432, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.27710843373493976, "acc_stderr,none": 0.03484331592680589}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.21084337349397592, "acc_stderr,none": 0.031755547866299194}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.2710843373493976, "acc_stderr,none": 0.03460579907553027}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.3, "acc_stderr,none": 0.0654653670707977}, "piqa": {"alias": "piqa", "acc,none": 0.7149075081610446, "acc_stderr,none": 0.010533270588738933, "acc_norm,none": 0.7100108813928183, "acc_norm_stderr,none": 0.010586899128169328}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.5100401606425703, "acc_stderr,none": 0.010020052116889137}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.46224899598393576, "acc_stderr,none": 0.009993466360872783}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3401606425702811, "acc_stderr,none": 0.009496174608136407}}
|
| 46 |
{"created_at": "2025-08-26T19:25:48.149463", "global_step": 92000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.3216723549488055, "acc_stderr,none": 0.013650488084494162, "acc_norm,none": 0.34044368600682595, "acc_norm_stderr,none": 0.013847460518892976}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.6826599326599326, "acc_stderr,none": 0.00955064834394777, "acc_norm,none": 0.6258417508417509, "acc_norm_stderr,none": 0.009929516948977625}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.2644444444444444, "acc_stderr,none": 0.01470940541341314, "acc_norm,none": 0.2644444444444444, "acc_norm_stderr,none": 0.01470940541341314}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.25333333333333335, "acc_stderr,none": 0.014505399844356812, "acc_norm,none": 0.25333333333333335, "acc_norm_stderr,none": 0.014505399844356812}, "belebele_pes_Arab": {"alias": "belebele_pes_Arab", "acc,none": 0.24333333333333335, "acc_stderr,none": 0.014311107963683047, "acc_norm,none": 0.24333333333333335, "acc_norm_stderr,none": 0.014311107963683047}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.25222222222222224, "acc_stderr,none": 0.014484319811433905, "acc_norm,none": 0.25222222222222224, "acc_norm_stderr,none": 0.014484319811433905}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884507, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884507}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.41266679944234214, "acc_stderr,none": 0.004913076844433762, "acc_norm,none": 0.533559051981677, "acc_norm_stderr,none": 0.004978529642140934}, "include_base_44_chinese": {"acc,none": 0.26788990825688075, "acc_stderr,none": 0.018865135158740404, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.18309859154929578, "acc_stderr,none": 0.046225147349214284}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.3333333333333333, "acc_stderr,none": 0.05083285677753486}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295698}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.15492957746478872, "acc_stderr,none": 0.04324785766640778}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.3448275862068966, "acc_stderr,none": 0.05125421389342353}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.3380281690140845, "acc_stderr,none": 0.05653887739133513}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.25, "acc_stderr,none": 0.11180339887498948}, "include_base_44_italian": {"acc,none": 0.2718978102189781, "acc_stderr,none": 0.019072515820800818, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.22857142857142856, "acc_stderr,none": 0.07201440432144052}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.2994011976047904, "acc_stderr,none": 0.03554736535384932}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.16666666666666666, "acc_stderr,none": 0.1123666437438737}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.27741935483870966, "acc_stderr,none": 0.03607872492487903}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.25149700598802394, "acc_stderr,none": 0.03367511880168704}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.3333333333333333, "acc_stderr,none": 0.14213381090374033}, "include_base_44_turkish": {"acc,none": 0.25364963503649635, "acc_stderr,none": 0.018616624984384385, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.27710843373493976, "acc_stderr,none": 0.03484331592680589}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.21686746987951808, "acc_stderr,none": 0.03208284450356365}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.25301204819277107, "acc_stderr,none": 0.03384429155233135}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.3, "acc_stderr,none": 0.0654653670707977}, "piqa": {"alias": "piqa", "acc,none": 0.7105549510337323, "acc_stderr,none": 0.010581014740675602, "acc_norm,none": 0.7083786724700761, "acc_norm_stderr,none": 0.01060444152742878}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.5044176706827309, "acc_stderr,none": 0.010021681681769335}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.4610441767068273, "acc_stderr,none": 0.009991608448389063}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3401606425702811, "acc_stderr,none": 0.009496174608136407}}
|
| 47 |
{"created_at": "2025-08-27T05:05:32.699170", "global_step": 94000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.3191126279863481, "acc_stderr,none": 0.01362169611917331, "acc_norm,none": 0.34215017064846415, "acc_norm_stderr,none": 0.013864152159177278}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.6835016835016835, "acc_stderr,none": 0.009543851857323888, "acc_norm,none": 0.6317340067340067, "acc_norm_stderr,none": 0.00989728620901089}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.2588888888888889, "acc_stderr,none": 0.01460893383616847, "acc_norm,none": 0.2588888888888889, "acc_norm_stderr,none": 0.01460893383616847}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.24333333333333335, "acc_stderr,none": 0.014311107963683052, "acc_norm,none": 0.24333333333333335, "acc_norm_stderr,none": 0.014311107963683052}, "belebele_pes_Arab": {"alias": "belebele_pes_Arab", "acc,none": 0.2611111111111111, "acc_stderr,none": 0.01464948638526213, "acc_norm,none": 0.2611111111111111, "acc_norm_stderr,none": 0.01464948638526213}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.25333333333333335, "acc_stderr,none": 0.014505399844356791, "acc_norm,none": 0.25333333333333335, "acc_norm_stderr,none": 0.014505399844356791}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.24333333333333335, "acc_stderr,none": 0.01431110796368305, "acc_norm,none": 0.24333333333333335, "acc_norm_stderr,none": 0.01431110796368305}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.41266679944234214, "acc_stderr,none": 0.004913076844433765, "acc_norm,none": 0.5345548695478988, "acc_norm_stderr,none": 0.00497785116190439}, "include_base_44_chinese": {"acc,none": 0.24220183486238533, "acc_stderr,none": 0.018347969547069576, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.27586206896551724, "acc_stderr,none": 0.048195602891152295}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.323943661971831, "acc_stderr,none": 0.05593416612923642}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.15492957746478872, "acc_stderr,none": 0.04324785766640782}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.26436781609195403, "acc_stderr,none": 0.04755382188278443}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.04993832453115033}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.18309859154929578, "acc_stderr,none": 0.04622514734921427}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.3125, "acc_stderr,none": 0.11967838846954226}, "include_base_44_italian": {"acc,none": 0.2427007299270073, "acc_stderr,none": 0.01830835184282819, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.08571428571428572, "acc_stderr,none": 0.048009602880960345}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.2754491017964072, "acc_stderr,none": 0.03467377173717454}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.16666666666666666, "acc_stderr,none": 0.11236664374387369}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.25806451612903225, "acc_stderr,none": 0.03526036935484004}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.23353293413173654, "acc_stderr,none": 0.03283724952964298}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_turkish": {"acc,none": 0.24087591240875914, "acc_stderr,none": 0.018302956462663326, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.26506024096385544, "acc_stderr,none": 0.03436024037944966}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.20481927710843373, "acc_stderr,none": 0.03141784291663926}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.2469879518072289, "acc_stderr,none": 0.03357351982064536}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.26, "acc_stderr,none": 0.06266203485560373}, "piqa": {"alias": "piqa", "acc,none": 0.7132752992383025, "acc_stderr,none": 0.010551314503108066, "acc_norm,none": 0.7072905331882481, "acc_norm_stderr,none": 0.010616044462393094}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.506425702811245, "acc_stderr,none": 0.010021245217159377}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.4646586345381526, "acc_stderr,none": 0.009997006138567233}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3385542168674699, "acc_stderr,none": 0.00948525020851688}}
|
| 48 |
+
{"created_at": "2025-08-27T10:25:51.709286", "global_step": 96000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.32764505119453924, "acc_stderr,none": 0.01371584794071934, "acc_norm,none": 0.34215017064846415, "acc_norm_stderr,none": 0.013864152159177278}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.6872895622895623, "acc_stderr,none": 0.009512819491443735, "acc_norm,none": 0.6292087542087542, "acc_norm_stderr,none": 0.009911292822056921}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.26222222222222225, "acc_stderr,none": 0.014669580202217909, "acc_norm,none": 0.26222222222222225, "acc_norm_stderr,none": 0.014669580202217909}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.24, "acc_stderr,none": 0.014244019879792654, "acc_norm,none": 0.24, "acc_norm_stderr,none": 0.014244019879792654}, "belebele_pes_Arab": {"alias": "belebele_pes_Arab", "acc,none": 0.2388888888888889, "acc_stderr,none": 0.014221393731276202, "acc_norm,none": 0.2388888888888889, "acc_norm_stderr,none": 0.014221393731276202}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.2388888888888889, "acc_stderr,none": 0.014221393731276202, "acc_norm,none": 0.2388888888888889, "acc_norm_stderr,none": 0.014221393731276202}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.23444444444444446, "acc_stderr,none": 0.014129554968110771, "acc_norm,none": 0.23444444444444446, "acc_norm_stderr,none": 0.014129554968110771}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.41266679944234214, "acc_stderr,none": 0.004913076844433761, "acc_norm,none": 0.5358494323839873, "acc_norm_stderr,none": 0.00497693933324009}, "include_base_44_chinese": {"acc,none": 0.25688073394495414, "acc_stderr,none": 0.018758582781447833, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.2676056338028169, "acc_stderr,none": 0.05291406220869697}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.22988505747126436, "acc_stderr,none": 0.045371581852507746}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.28169014084507044, "acc_stderr,none": 0.053764141713832536}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.18309859154929578, "acc_stderr,none": 0.04622514734921427}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.3103448275862069, "acc_stderr,none": 0.04988718850038747}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.2676056338028169, "acc_stderr,none": 0.05291406220869697}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.04993832453115031}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.375, "acc_stderr,none": 0.125}, "include_base_44_italian": {"acc,none": 0.24817518248175183, "acc_stderr,none": 0.018368616910092304, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.08571428571428572, "acc_stderr,none": 0.048009602880960345}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.2994011976047904, "acc_stderr,none": 0.03554736535384932}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.08333333333333333, "acc_stderr,none": 0.08333333333333331}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.2709677419354839, "acc_stderr,none": 0.03581556513964114}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.2155688622754491, "acc_stderr,none": 0.031916582528798025}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.3333333333333333, "acc_stderr,none": 0.14213381090374033}, "include_base_44_turkish": {"acc,none": 0.25364963503649635, "acc_stderr,none": 0.018606462359235718, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.29518072289156627, "acc_stderr,none": 0.03550920185689631}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.23493975903614459, "acc_stderr,none": 0.03300533186128922}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.22289156626506024, "acc_stderr,none": 0.03240004825594688}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.28, "acc_stderr,none": 0.06414269805898185}, "piqa": {"alias": "piqa", "acc,none": 0.7127312295973884, "acc_stderr,none": 0.010557291761528635, "acc_norm,none": 0.7089227421109902, "acc_norm_stderr,none": 0.010598612490942596}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.5056224899598394, "acc_stderr,none": 0.010021439203777337}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.4678714859437751, "acc_stderr,none": 0.010001361068173075}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3405622489959839, "acc_stderr,none": 0.009498886690274443}}
|
| 49 |
+
{"created_at": "2025-08-27T10:38:35.029799", "global_step": 98000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.3199658703071672, "acc_stderr,none": 0.013631345807016193, "acc_norm,none": 0.3412969283276451, "acc_norm_stderr,none": 0.013855831287497724}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.6856060606060606, "acc_stderr,none": 0.009526702423162907, "acc_norm,none": 0.6321548821548821, "acc_norm_stderr,none": 0.009894923464455186}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.2611111111111111, "acc_stderr,none": 0.014649486385262098, "acc_norm,none": 0.2611111111111111, "acc_norm_stderr,none": 0.014649486385262098}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.24222222222222223, "acc_stderr,none": 0.014288876375699817, "acc_norm,none": 0.24222222222222223, "acc_norm_stderr,none": 0.014288876375699817}, "belebele_pes_Arab": {"alias": "belebele_pes_Arab", "acc,none": 0.24333333333333335, "acc_stderr,none": 0.014311107963683041, "acc_norm,none": 0.24333333333333335, "acc_norm_stderr,none": 0.014311107963683041}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.23444444444444446, "acc_stderr,none": 0.014129554968110771, "acc_norm,none": 0.23444444444444446, "acc_norm_stderr,none": 0.014129554968110771}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.23333333333333334, "acc_stderr,none": 0.014106258477755677, "acc_norm,none": 0.23333333333333334, "acc_norm_stderr,none": 0.014106258477755677}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.4129655447122087, "acc_stderr,none": 0.004913604782665852, "acc_norm,none": 0.534654451304521, "acc_norm_stderr,none": 0.00497778221758246}, "include_base_44_chinese": {"acc,none": 0.26055045871559634, "acc_stderr,none": 0.01889220090587809, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295698}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.26436781609195403, "acc_stderr,none": 0.04755382188278444}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.28169014084507044, "acc_stderr,none": 0.053764141713832536}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.2112676056338028, "acc_stderr,none": 0.04879016359348949}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.3103448275862069, "acc_stderr,none": 0.04988718850038747}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.2676056338028169, "acc_stderr,none": 0.05291406220869697}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_italian": {"acc,none": 0.24087591240875914, "acc_stderr,none": 0.018191035635425172, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.05714285714285714, "acc_stderr,none": 0.03980745977252778}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.2874251497005988, "acc_stderr,none": 0.03512558647799084}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.08333333333333333, "acc_stderr,none": 0.08333333333333331}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.25806451612903225, "acc_stderr,none": 0.03526036935484003}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.2275449101796407, "acc_stderr,none": 0.032539894331085194}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_turkish": {"acc,none": 0.2572992700729927, "acc_stderr,none": 0.018680557006555378, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.30120481927710846, "acc_stderr,none": 0.0357160923005348}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.22289156626506024, "acc_stderr,none": 0.03240004825594688}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.23493975903614459, "acc_stderr,none": 0.03300533186128922}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.3, "acc_stderr,none": 0.06546536707079771}, "piqa": {"alias": "piqa", "acc,none": 0.7159956474428727, "acc_stderr,none": 0.010521147542454222, "acc_norm,none": 0.7078346028291621, "acc_norm_stderr,none": 0.010610252174513658}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.5092369477911647, "acc_stderr,none": 0.01002036253063136}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.4682730923694779, "acc_stderr,none": 0.010001876146466703}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3393574297188755, "acc_stderr,none": 0.009490727635646757}}
|
| 50 |
+
{"created_at": "2025-08-27T10:45:40.898326", "global_step": 100000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.3250853242320819, "acc_stderr,none": 0.013688147309729125, "acc_norm,none": 0.3387372013651877, "acc_norm_stderr,none": 0.01383056892797433}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.6835016835016835, "acc_stderr,none": 0.009543851857323888, "acc_norm,none": 0.6342592592592593, "acc_norm_stderr,none": 0.009882988069418834}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.25555555555555554, "acc_stderr,none": 0.014547185072254283, "acc_norm,none": 0.25555555555555554, "acc_norm_stderr,none": 0.014547185072254283}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.23777777777777778, "acc_stderr,none": 0.0141986348093082, "acc_norm,none": 0.23777777777777778, "acc_norm_stderr,none": 0.0141986348093082}, "belebele_pes_Arab": {"alias": "belebele_pes_Arab", "acc,none": 0.23555555555555555, "acc_stderr,none": 0.014152716079132627, "acc_norm,none": 0.23555555555555555, "acc_norm_stderr,none": 0.014152716079132627}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.23777777777777778, "acc_stderr,none": 0.014198634809308183, "acc_norm,none": 0.23777777777777778, "acc_norm_stderr,none": 0.014198634809308183}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.2388888888888889, "acc_stderr,none": 0.014221393731276207, "acc_norm,none": 0.2388888888888889, "acc_norm_stderr,none": 0.014221393731276207}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.4124676359290978, "acc_stderr,none": 0.004912723848944788, "acc_norm,none": 0.5366460864369648, "acc_norm_stderr,none": 0.004976361454341364}, "include_base_44_chinese": {"acc,none": 0.26055045871559634, "acc_stderr,none": 0.018873760231513624, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.049938324531150304}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.28735632183908044, "acc_stderr,none": 0.048797477314965754}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.30985915492957744, "acc_stderr,none": 0.055271596817883316}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.19718309859154928, "acc_stderr,none": 0.04755476905953275}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.28735632183908044, "acc_stderr,none": 0.04879747731496573}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295698}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.3125, "acc_stderr,none": 0.11967838846954226}, "include_base_44_italian": {"acc,none": 0.25364963503649635, "acc_stderr,none": 0.018493600697339158, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.05714285714285714, "acc_stderr,none": 0.03980745977252778}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.31137724550898205, "acc_stderr,none": 0.03594016584565771}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.16666666666666666, "acc_stderr,none": 0.11236664374387369}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.2709677419354839, "acc_stderr,none": 0.03581556513964114}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.2275449101796407, "acc_stderr,none": 0.03253989433108519}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_turkish": {"acc,none": 0.26094890510948904, "acc_stderr,none": 0.018806307830725472, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.2891566265060241, "acc_stderr,none": 0.03529486801511116}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.2469879518072289, "acc_stderr,none": 0.03357351982064537}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.24096385542168675, "acc_stderr,none": 0.033293941190735296}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.28, "acc_stderr,none": 0.06414269805898186}, "piqa": {"alias": "piqa", "acc,none": 0.7159956474428727, "acc_stderr,none": 0.010521147542454224, "acc_norm,none": 0.7078346028291621, "acc_norm_stderr,none": 0.010610252174513658}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.5068273092369477, "acc_stderr,none": 0.010021138522919162}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.46626506024096387, "acc_stderr,none": 0.009999235684721613}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3385542168674699, "acc_stderr,none": 0.00948525020851688}}
|