Upload google-bert-bert-base-multilingual-cased/metrics.eval.jsonl with huggingface_hub
Browse files
google-bert-bert-base-multilingual-cased/metrics.eval.jsonl
CHANGED
|
@@ -6,3 +6,5 @@
|
|
| 6 |
{"created_at": "2025-09-03T07:57:09.280741", "global_step": 60000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.3037542662116041, "acc_stderr,none": 0.013438909184778768, "acc_norm,none": 0.3293515358361775, "acc_norm_stderr,none": 0.013734057652635473}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.6561447811447811, "acc_stderr,none": 0.009746660584852445, "acc_norm,none": 0.5904882154882155, "acc_norm_stderr,none": 0.010090368160990062}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.25222222222222224, "acc_stderr,none": 0.0144843198114339, "acc_norm,none": 0.25222222222222224, "acc_norm_stderr,none": 0.0144843198114339}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.21666666666666667, "acc_stderr,none": 0.013740087830700177, "acc_norm,none": 0.21666666666666667, "acc_norm_stderr,none": 0.013740087830700177}, "belebele_pes_Arab": {"alias": "belebele_pes_Arab", "acc,none": 0.2311111111111111, "acc_stderr,none": 0.014059256663218186, "acc_norm,none": 0.2311111111111111, "acc_norm_stderr,none": 0.014059256663218186}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.25333333333333335, "acc_stderr,none": 0.014505399844356793, "acc_norm,none": 0.25333333333333335, "acc_norm_stderr,none": 0.014505399844356793}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.2311111111111111, "acc_stderr,none": 0.014059256663218193, "acc_norm,none": 0.2311111111111111, "acc_norm_stderr,none": 0.014059256663218193}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.4001194981079466, "acc_stderr,none": 0.004889210628907949, "acc_norm,none": 0.5276837283409679, "acc_norm_stderr,none": 0.004982127315605207}, "include_base_44_chinese": {"acc,none": 0.20917431192660552, "acc_stderr,none": 0.01749303732528358, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.19718309859154928, "acc_stderr,none": 0.04755476905953275}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.27586206896551724, "acc_stderr,none": 0.048195602891152295}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.18309859154929578, "acc_stderr,none": 0.046225147349214284}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.19718309859154928, "acc_stderr,none": 0.04755476905953275}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.21839080459770116, "acc_stderr,none": 0.044551545932103705}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.19718309859154928, "acc_stderr,none": 0.047554769059532744}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.19718309859154928, "acc_stderr,none": 0.04755476905953276}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.125, "acc_stderr,none": 0.08539125638299665}, "include_base_44_italian": {"acc,none": 0.27007299270072993, "acc_stderr,none": 0.018947752997512255, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.2571428571428571, "acc_stderr,none": 0.07495496847387485}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.31736526946107785, "acc_stderr,none": 0.036125997314033945}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.16666666666666666, "acc_stderr,none": 0.11236664374387367}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.23870967741935484, "acc_stderr,none": 0.03435182440245766}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.24550898203592814, "acc_stderr,none": 0.03340463153945588}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.5, "acc_stderr,none": 0.15075567228888181}, "include_base_44_turkish": {"acc,none": 0.2208029197080292, "acc_stderr,none": 0.017780501685203974, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.2289156626506024, "acc_stderr,none": 0.03270745277352477}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.21686746987951808, "acc_stderr,none": 0.032082844503563655}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.22289156626506024, "acc_stderr,none": 0.032400048255946876}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.2, "acc_stderr,none": 0.057142857142857155}, "piqa": {"alias": "piqa", "acc,none": 0.6926006528835691, "acc_stderr,none": 0.010765602506939073, "acc_norm,none": 0.7029379760609358, "acc_norm_stderr,none": 0.01066172540481479}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.4835341365461847, "acc_stderr,none": 0.010016636930829975}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.43052208835341366, "acc_stderr,none": 0.009924844537285525}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3313253012048193, "acc_stderr,none": 0.009434574056101966}}
|
| 7 |
{"created_at": "2025-09-04T05:11:31.722386", "global_step": 70000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.3250853242320819, "acc_stderr,none": 0.013688147309729125, "acc_norm,none": 0.35580204778157, "acc_norm_stderr,none": 0.013990571137918757}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.6788720538720538, "acc_stderr,none": 0.009580787536986797, "acc_norm,none": 0.6035353535353535, "acc_norm_stderr,none": 0.010037412763064522}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.2311111111111111, "acc_stderr,none": 0.014059256663218195, "acc_norm,none": 0.2311111111111111, "acc_norm_stderr,none": 0.014059256663218195}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.22777777777777777, "acc_stderr,none": 0.013987721523687963, "acc_norm,none": 0.22777777777777777, "acc_norm_stderr,none": 0.013987721523687963}, "belebele_pes_Arab": {"alias": "belebele_pes_Arab", "acc,none": 0.23222222222222222, "acc_stderr,none": 0.014082825936214571, "acc_norm,none": 0.23222222222222222, "acc_norm_stderr,none": 0.014082825936214571}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.23222222222222222, "acc_stderr,none": 0.014082825936214571, "acc_norm,none": 0.23222222222222222, "acc_norm_stderr,none": 0.014082825936214571}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.22444444444444445, "acc_stderr,none": 0.013914930474237287, "acc_norm,none": 0.22444444444444445, "acc_norm_stderr,none": 0.013914930474237287}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.41127265484963155, "acc_stderr,none": 0.0049105884493300155, "acc_norm,none": 0.5483967337183828, "acc_norm_stderr,none": 0.004966351835028204}, "include_base_44_chinese": {"acc,none": 0.24403669724770644, "acc_stderr,none": 0.01848372282421882, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.049938324531150324}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.25287356321839083, "acc_stderr,none": 0.04687049503854671}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.30985915492957744, "acc_stderr,none": 0.05527159681788331}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.2413793103448276, "acc_stderr,none": 0.04614377668264891}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.051995626882956975}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.19718309859154928, "acc_stderr,none": 0.04755476905953276}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_italian": {"acc,none": 0.25364963503649635, "acc_stderr,none": 0.018569247333205148, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.14285714285714285, "acc_stderr,none": 0.06001200360120039}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.2934131736526946, "acc_stderr,none": 0.03534016139050467}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.16666666666666666, "acc_stderr,none": 0.11236664374387367}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.2838709677419355, "acc_stderr,none": 0.03633254072705441}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.20958083832335328, "acc_stderr,none": 0.03159006158827181}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.3333333333333333, "acc_stderr,none": 0.14213381090374033}, "include_base_44_turkish": {"acc,none": 0.28102189781021897, "acc_stderr,none": 0.01915219118227231, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.3132530120481928, "acc_stderr,none": 0.03610805018031023}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.2289156626506024, "acc_stderr,none": 0.03270745277352477}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.26506024096385544, "acc_stderr,none": 0.03436024037944967}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.4, "acc_stderr,none": 0.06998542122237653}, "piqa": {"alias": "piqa", "acc,none": 0.7094668117519043, "acc_stderr,none": 0.010592765034696536, "acc_norm,none": 0.7127312295973884, "acc_norm_stderr,none": 0.010557291761528632}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.5004016064257029, "acc_stderr,none": 0.010022069634353863}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.4353413654618474, "acc_stderr,none": 0.009937920221480507}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3353413654618474, "acc_stderr,none": 0.009463034891512704}}
|
| 8 |
{"created_at": "2025-09-04T06:31:48.827890", "global_step": 80000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.3191126279863481, "acc_stderr,none": 0.013621696119173314, "acc_norm,none": 0.34982935153583616, "acc_norm_stderr,none": 0.013936809212158284}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.6771885521885522, "acc_stderr,none": 0.009593950220366744, "acc_norm,none": 0.6069023569023569, "acc_norm_stderr,none": 0.010022540618945305}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.22777777777777777, "acc_stderr,none": 0.013987721523687961, "acc_norm,none": 0.22777777777777777, "acc_norm_stderr,none": 0.013987721523687961}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.2311111111111111, "acc_stderr,none": 0.01405925666321819, "acc_norm,none": 0.2311111111111111, "acc_norm_stderr,none": 0.01405925666321819}, "belebele_pes_Arab": {"alias": "belebele_pes_Arab", "acc,none": 0.23555555555555555, "acc_stderr,none": 0.014152716079132635, "acc_norm,none": 0.23555555555555555, "acc_norm_stderr,none": 0.014152716079132635}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.24333333333333335, "acc_stderr,none": 0.014311107963683052, "acc_norm,none": 0.24333333333333335, "acc_norm_stderr,none": 0.014311107963683052}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.2388888888888889, "acc_stderr,none": 0.014221393731276207, "acc_norm,none": 0.2388888888888889, "acc_norm_stderr,none": 0.014221393731276207}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.425911173073093, "acc_stderr,none": 0.004934698012050244, "acc_norm,none": 0.5656243776140211, "acc_norm_stderr,none": 0.004946617138983514}, "include_base_44_chinese": {"acc,none": 0.24587155963302754, "acc_stderr,none": 0.01850104240213322, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.2112676056338028, "acc_stderr,none": 0.048790163593489484}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.2413793103448276, "acc_stderr,none": 0.046143776682648914}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.30985915492957744, "acc_stderr,none": 0.055271596817883316}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.2988505747126437, "acc_stderr,none": 0.049360904959780114}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.04993832453115034}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.19718309859154928, "acc_stderr,none": 0.04755476905953276}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_italian": {"acc,none": 0.24087591240875914, "acc_stderr,none": 0.018306114689962162, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.22857142857142856, "acc_stderr,none": 0.0720144043214405}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.2754491017964072, "acc_stderr,none": 0.03467377173717454}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.08333333333333333, "acc_stderr,none": 0.08333333333333333}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.22580645161290322, "acc_stderr,none": 0.03369244953981245}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.2275449101796407, "acc_stderr,none": 0.032539894331085194}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.3333333333333333, "acc_stderr,none": 0.14213381090374033}, "include_base_44_turkish": {"acc,none": 0.25364963503649635, "acc_stderr,none": 0.018616234216771214, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.22289156626506024, "acc_stderr,none": 0.032400048255946876}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.2891566265060241, "acc_stderr,none": 0.03529486801511115}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.24096385542168675, "acc_stderr,none": 0.033293941190735296}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.28, "acc_stderr,none": 0.06414269805898185}, "piqa": {"alias": "piqa", "acc,none": 0.7132752992383025, "acc_stderr,none": 0.01055131450310806, "acc_norm,none": 0.7170837867247007, "acc_norm_stderr,none": 0.010508949177489688}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.5108433734939759, "acc_stderr,none": 0.01001971582448348}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.44899598393574297, "acc_stderr,none": 0.00996979347724083}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3417670682730924, "acc_stderr,none": 0.009506977398287618}}
|
|
|
|
|
|
|
|
|
| 6 |
{"created_at": "2025-09-03T07:57:09.280741", "global_step": 60000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.3037542662116041, "acc_stderr,none": 0.013438909184778768, "acc_norm,none": 0.3293515358361775, "acc_norm_stderr,none": 0.013734057652635473}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.6561447811447811, "acc_stderr,none": 0.009746660584852445, "acc_norm,none": 0.5904882154882155, "acc_norm_stderr,none": 0.010090368160990062}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.25222222222222224, "acc_stderr,none": 0.0144843198114339, "acc_norm,none": 0.25222222222222224, "acc_norm_stderr,none": 0.0144843198114339}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.21666666666666667, "acc_stderr,none": 0.013740087830700177, "acc_norm,none": 0.21666666666666667, "acc_norm_stderr,none": 0.013740087830700177}, "belebele_pes_Arab": {"alias": "belebele_pes_Arab", "acc,none": 0.2311111111111111, "acc_stderr,none": 0.014059256663218186, "acc_norm,none": 0.2311111111111111, "acc_norm_stderr,none": 0.014059256663218186}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.25333333333333335, "acc_stderr,none": 0.014505399844356793, "acc_norm,none": 0.25333333333333335, "acc_norm_stderr,none": 0.014505399844356793}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.2311111111111111, "acc_stderr,none": 0.014059256663218193, "acc_norm,none": 0.2311111111111111, "acc_norm_stderr,none": 0.014059256663218193}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.4001194981079466, "acc_stderr,none": 0.004889210628907949, "acc_norm,none": 0.5276837283409679, "acc_norm_stderr,none": 0.004982127315605207}, "include_base_44_chinese": {"acc,none": 0.20917431192660552, "acc_stderr,none": 0.01749303732528358, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.19718309859154928, "acc_stderr,none": 0.04755476905953275}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.27586206896551724, "acc_stderr,none": 0.048195602891152295}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.18309859154929578, "acc_stderr,none": 0.046225147349214284}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.19718309859154928, "acc_stderr,none": 0.04755476905953275}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.21839080459770116, "acc_stderr,none": 0.044551545932103705}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.19718309859154928, "acc_stderr,none": 0.047554769059532744}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.19718309859154928, "acc_stderr,none": 0.04755476905953276}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.125, "acc_stderr,none": 0.08539125638299665}, "include_base_44_italian": {"acc,none": 0.27007299270072993, "acc_stderr,none": 0.018947752997512255, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.2571428571428571, "acc_stderr,none": 0.07495496847387485}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.31736526946107785, "acc_stderr,none": 0.036125997314033945}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.16666666666666666, "acc_stderr,none": 0.11236664374387367}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.23870967741935484, "acc_stderr,none": 0.03435182440245766}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.24550898203592814, "acc_stderr,none": 0.03340463153945588}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.5, "acc_stderr,none": 0.15075567228888181}, "include_base_44_turkish": {"acc,none": 0.2208029197080292, "acc_stderr,none": 0.017780501685203974, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.2289156626506024, "acc_stderr,none": 0.03270745277352477}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.21686746987951808, "acc_stderr,none": 0.032082844503563655}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.22289156626506024, "acc_stderr,none": 0.032400048255946876}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.2, "acc_stderr,none": 0.057142857142857155}, "piqa": {"alias": "piqa", "acc,none": 0.6926006528835691, "acc_stderr,none": 0.010765602506939073, "acc_norm,none": 0.7029379760609358, "acc_norm_stderr,none": 0.01066172540481479}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.4835341365461847, "acc_stderr,none": 0.010016636930829975}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.43052208835341366, "acc_stderr,none": 0.009924844537285525}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3313253012048193, "acc_stderr,none": 0.009434574056101966}}
|
| 7 |
{"created_at": "2025-09-04T05:11:31.722386", "global_step": 70000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.3250853242320819, "acc_stderr,none": 0.013688147309729125, "acc_norm,none": 0.35580204778157, "acc_norm_stderr,none": 0.013990571137918757}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.6788720538720538, "acc_stderr,none": 0.009580787536986797, "acc_norm,none": 0.6035353535353535, "acc_norm_stderr,none": 0.010037412763064522}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.2311111111111111, "acc_stderr,none": 0.014059256663218195, "acc_norm,none": 0.2311111111111111, "acc_norm_stderr,none": 0.014059256663218195}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.22777777777777777, "acc_stderr,none": 0.013987721523687963, "acc_norm,none": 0.22777777777777777, "acc_norm_stderr,none": 0.013987721523687963}, "belebele_pes_Arab": {"alias": "belebele_pes_Arab", "acc,none": 0.23222222222222222, "acc_stderr,none": 0.014082825936214571, "acc_norm,none": 0.23222222222222222, "acc_norm_stderr,none": 0.014082825936214571}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.23222222222222222, "acc_stderr,none": 0.014082825936214571, "acc_norm,none": 0.23222222222222222, "acc_norm_stderr,none": 0.014082825936214571}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.22444444444444445, "acc_stderr,none": 0.013914930474237287, "acc_norm,none": 0.22444444444444445, "acc_norm_stderr,none": 0.013914930474237287}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.41127265484963155, "acc_stderr,none": 0.0049105884493300155, "acc_norm,none": 0.5483967337183828, "acc_norm_stderr,none": 0.004966351835028204}, "include_base_44_chinese": {"acc,none": 0.24403669724770644, "acc_stderr,none": 0.01848372282421882, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.049938324531150324}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.25287356321839083, "acc_stderr,none": 0.04687049503854671}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.30985915492957744, "acc_stderr,none": 0.05527159681788331}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.2413793103448276, "acc_stderr,none": 0.04614377668264891}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.051995626882956975}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.19718309859154928, "acc_stderr,none": 0.04755476905953276}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_italian": {"acc,none": 0.25364963503649635, "acc_stderr,none": 0.018569247333205148, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.14285714285714285, "acc_stderr,none": 0.06001200360120039}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.2934131736526946, "acc_stderr,none": 0.03534016139050467}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.16666666666666666, "acc_stderr,none": 0.11236664374387367}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.2838709677419355, "acc_stderr,none": 0.03633254072705441}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.20958083832335328, "acc_stderr,none": 0.03159006158827181}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.3333333333333333, "acc_stderr,none": 0.14213381090374033}, "include_base_44_turkish": {"acc,none": 0.28102189781021897, "acc_stderr,none": 0.01915219118227231, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.3132530120481928, "acc_stderr,none": 0.03610805018031023}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.2289156626506024, "acc_stderr,none": 0.03270745277352477}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.26506024096385544, "acc_stderr,none": 0.03436024037944967}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.4, "acc_stderr,none": 0.06998542122237653}, "piqa": {"alias": "piqa", "acc,none": 0.7094668117519043, "acc_stderr,none": 0.010592765034696536, "acc_norm,none": 0.7127312295973884, "acc_norm_stderr,none": 0.010557291761528632}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.5004016064257029, "acc_stderr,none": 0.010022069634353863}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.4353413654618474, "acc_stderr,none": 0.009937920221480507}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3353413654618474, "acc_stderr,none": 0.009463034891512704}}
|
| 8 |
{"created_at": "2025-09-04T06:31:48.827890", "global_step": 80000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.3191126279863481, "acc_stderr,none": 0.013621696119173314, "acc_norm,none": 0.34982935153583616, "acc_norm_stderr,none": 0.013936809212158284}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.6771885521885522, "acc_stderr,none": 0.009593950220366744, "acc_norm,none": 0.6069023569023569, "acc_norm_stderr,none": 0.010022540618945305}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.22777777777777777, "acc_stderr,none": 0.013987721523687961, "acc_norm,none": 0.22777777777777777, "acc_norm_stderr,none": 0.013987721523687961}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.2311111111111111, "acc_stderr,none": 0.01405925666321819, "acc_norm,none": 0.2311111111111111, "acc_norm_stderr,none": 0.01405925666321819}, "belebele_pes_Arab": {"alias": "belebele_pes_Arab", "acc,none": 0.23555555555555555, "acc_stderr,none": 0.014152716079132635, "acc_norm,none": 0.23555555555555555, "acc_norm_stderr,none": 0.014152716079132635}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.24333333333333335, "acc_stderr,none": 0.014311107963683052, "acc_norm,none": 0.24333333333333335, "acc_norm_stderr,none": 0.014311107963683052}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.2388888888888889, "acc_stderr,none": 0.014221393731276207, "acc_norm,none": 0.2388888888888889, "acc_norm_stderr,none": 0.014221393731276207}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.425911173073093, "acc_stderr,none": 0.004934698012050244, "acc_norm,none": 0.5656243776140211, "acc_norm_stderr,none": 0.004946617138983514}, "include_base_44_chinese": {"acc,none": 0.24587155963302754, "acc_stderr,none": 0.01850104240213322, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.2112676056338028, "acc_stderr,none": 0.048790163593489484}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.2413793103448276, "acc_stderr,none": 0.046143776682648914}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.30985915492957744, "acc_stderr,none": 0.055271596817883316}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.2988505747126437, "acc_stderr,none": 0.049360904959780114}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.04993832453115034}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.19718309859154928, "acc_stderr,none": 0.04755476905953276}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_italian": {"acc,none": 0.24087591240875914, "acc_stderr,none": 0.018306114689962162, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.22857142857142856, "acc_stderr,none": 0.0720144043214405}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.2754491017964072, "acc_stderr,none": 0.03467377173717454}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.08333333333333333, "acc_stderr,none": 0.08333333333333333}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.22580645161290322, "acc_stderr,none": 0.03369244953981245}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.2275449101796407, "acc_stderr,none": 0.032539894331085194}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.3333333333333333, "acc_stderr,none": 0.14213381090374033}, "include_base_44_turkish": {"acc,none": 0.25364963503649635, "acc_stderr,none": 0.018616234216771214, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.22289156626506024, "acc_stderr,none": 0.032400048255946876}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.2891566265060241, "acc_stderr,none": 0.03529486801511115}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.24096385542168675, "acc_stderr,none": 0.033293941190735296}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.28, "acc_stderr,none": 0.06414269805898185}, "piqa": {"alias": "piqa", "acc,none": 0.7132752992383025, "acc_stderr,none": 0.01055131450310806, "acc_norm,none": 0.7170837867247007, "acc_norm_stderr,none": 0.010508949177489688}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.5108433734939759, "acc_stderr,none": 0.01001971582448348}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.44899598393574297, "acc_stderr,none": 0.00996979347724083}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3417670682730924, "acc_stderr,none": 0.009506977398287618}}
|
| 9 |
+
{"created_at": "2025-09-05T18:47:20.879377", "global_step": 90000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.3319112627986348, "acc_stderr,none": 0.013760988200880543, "acc_norm,none": 0.37542662116040953, "acc_norm_stderr,none": 0.014150631435111728}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.6898148148148148, "acc_stderr,none": 0.009491721291998515, "acc_norm,none": 0.6321548821548821, "acc_norm_stderr,none": 0.00989492346445518}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.23777777777777778, "acc_stderr,none": 0.014198634809308214, "acc_norm,none": 0.23777777777777778, "acc_norm_stderr,none": 0.014198634809308214}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.21555555555555556, "acc_stderr,none": 0.01371452783236968, "acc_norm,none": 0.21555555555555556, "acc_norm_stderr,none": 0.01371452783236968}, "belebele_pes_Arab": {"alias": "belebele_pes_Arab", "acc,none": 0.21777777777777776, "acc_stderr,none": 0.013765500608039471, "acc_norm,none": 0.21777777777777776, "acc_norm_stderr,none": 0.013765500608039471}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.22444444444444445, "acc_stderr,none": 0.013914930474237287, "acc_norm,none": 0.22444444444444445, "acc_norm_stderr,none": 0.013914930474237287}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884525, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884525}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.4309898426608245, "acc_stderr,none": 0.004942026200279599, "acc_norm,none": 0.5739892451702848, "acc_norm_stderr,none": 0.004934846809827203}, "include_base_44_chinese": {"acc,none": 0.24036697247706423, "acc_stderr,none": 0.018396188774261225, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.049938324531150324}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.2413793103448276, "acc_stderr,none": 0.046143776682648914}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.2676056338028169, "acc_stderr,none": 0.05291406220869697}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.2112676056338028, "acc_stderr,none": 0.04879016359348947}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.27586206896551724, "acc_stderr,none": 0.048195602891152274}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.2676056338028169, "acc_stderr,none": 0.05291406220869697}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.19718309859154928, "acc_stderr,none": 0.04755476905953276}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_italian": {"acc,none": 0.25547445255474455, "acc_stderr,none": 0.018630161945937893, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.14285714285714285, "acc_stderr,none": 0.06001200360120039}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.2694610778443114, "acc_stderr,none": 0.034436234538994775}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.4166666666666667, "acc_stderr,none": 0.1486470975026408}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.23870967741935484, "acc_stderr,none": 0.034351824402457654}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.25748502994011974, "acc_stderr,none": 0.03393708648569707}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.4166666666666667, "acc_stderr,none": 0.1486470975026408}, "include_base_44_turkish": {"acc,none": 0.25, "acc_stderr,none": 0.0185056211373119, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.2469879518072289, "acc_stderr,none": 0.03357351982064536}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.29518072289156627, "acc_stderr,none": 0.035509201856896294}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.2289156626506024, "acc_stderr,none": 0.03270745277352477}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.18, "acc_stderr,none": 0.054883922035138706}, "piqa": {"alias": "piqa", "acc,none": 0.7121871599564744, "acc_stderr,none": 0.01056325038305919, "acc_norm,none": 0.7176278563656148, "acc_norm_stderr,none": 0.010502821668555377}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.5080321285140562, "acc_stderr,none": 0.010020779633955238}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.4634538152610442, "acc_stderr,none": 0.009995265580368914}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3397590361445783, "acc_stderr,none": 0.00949345492543825}}
|
| 10 |
+
{"created_at": "2025-09-05T18:56:30.712902", "global_step": 100000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.3302047781569966, "acc_stderr,none": 0.013743085603760419, "acc_norm,none": 0.3643344709897611, "acc_norm_stderr,none": 0.014063260279882415}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.6902356902356902, "acc_stderr,none": 0.00948817285190372, "acc_norm,none": 0.6245791245791246, "acc_norm_stderr,none": 0.0099362185271143}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.22555555555555556, "acc_stderr,none": 0.013939334910458123, "acc_norm,none": 0.22555555555555556, "acc_norm_stderr,none": 0.013939334910458123}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.20777777777777778, "acc_stderr,none": 0.013531414972025825, "acc_norm,none": 0.20777777777777778, "acc_norm_stderr,none": 0.013531414972025825}, "belebele_pes_Arab": {"alias": "belebele_pes_Arab", "acc,none": 0.22444444444444445, "acc_stderr,none": 0.013914930474237287, "acc_norm,none": 0.22444444444444445, "acc_norm_stderr,none": 0.013914930474237287}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.23, "acc_stderr,none": 0.014035549969945582, "acc_norm,none": 0.23, "acc_norm_stderr,none": 0.014035549969945582}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.24, "acc_stderr,none": 0.014244019879792642, "acc_norm,none": 0.24, "acc_norm_stderr,none": 0.014244019879792642}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.43238398725353516, "acc_stderr,none": 0.004943945069611452, "acc_norm,none": 0.5790679147580163, "acc_norm_stderr,none": 0.004926996830194241}, "include_base_44_chinese": {"acc,none": 0.24220183486238533, "acc_stderr,none": 0.01843279652730255, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.27586206896551724, "acc_stderr,none": 0.048195602891152295}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295696}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.04993832453115033}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.27586206896551724, "acc_stderr,none": 0.048195602891152274}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.19718309859154928, "acc_stderr,none": 0.04755476905953276}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.125, "acc_stderr,none": 0.08539125638299665}, "include_base_44_italian": {"acc,none": 0.27007299270072993, "acc_stderr,none": 0.018960271703289854, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.2, "acc_stderr,none": 0.06859943405700356}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.2754491017964072, "acc_stderr,none": 0.034673771737174536}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.5, "acc_stderr,none": 0.15075567228888181}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.2838709677419355, "acc_stderr,none": 0.036332540727054406}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.23952095808383234, "acc_stderr,none": 0.03312541599851861}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.4166666666666667, "acc_stderr,none": 0.1486470975026408}, "include_base_44_turkish": {"acc,none": 0.2572992700729927, "acc_stderr,none": 0.018668394296644946, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.2710843373493976, "acc_stderr,none": 0.03460579907553027}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.29518072289156627, "acc_stderr,none": 0.03550920185689629}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.23493975903614459, "acc_stderr,none": 0.03300533186128922}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.16, "acc_stderr,none": 0.052372293656638154}, "piqa": {"alias": "piqa", "acc,none": 0.7176278563656148, "acc_stderr,none": 0.010502821668555377, "acc_norm,none": 0.720892274211099, "acc_norm_stderr,none": 0.010465657948498228}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.5144578313253012, "acc_stderr,none": 0.010017882185606017}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.4738955823293173, "acc_stderr,none": 0.010008404651660658}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3401606425702811, "acc_stderr,none": 0.009496174608136407}}
|