Spaces:

orbtailwaves23
/

open_dutch_llm_leaderboard

Sleeping

laiviet commited on Jun 4, 2023

Commit

a5244e0

1 Parent(s): 95b0e17

Update app.py to be compatile with the new logs

Files changed (1) hide show

app.py CHANGED Viewed

@@ -6,10 +6,10 @@ import gradio as gr
 from content import *
 import glob
-ARC = "arc_challenge"
 HELLASWAG = "hellaswag"
 MMLU = "mmlu"
-TRUTHFULQA = "truthfulqa-mc"
 BENCHMARKS = [ARC, HELLASWAG, MMLU, TRUTHFULQA]
 METRICS = ["acc_norm", "acc_norm", "acc_norm", "mc2"]
@@ -39,18 +39,8 @@ def collect_results():
         pretrained_models.add(pretrained)
         for lang_task, perfs in results.items():
-            if lang_task.startswith('arc_') and lang_task.endswith('_challenge'):
-                lang = lang_task.split('_')[1]
-                task = ARC
-            elif lang_task.startswith('hellaswag_'):
-                _, lang = lang_task.split('_')
-                task = HELLASWAG
-            elif lang_task.startswith('mmlu_'):
-                _, lang = lang_task.split('_')
-                task = MMLU
-            elif lang_task.startswith('truthfulqa_') and lang_task.endswith('_mc'):
-                lang = lang_task.split('_')[1]
-                task = TRUTHFULQA
             if lang and task:
                 metric = METRICS[BENCHMARKS.index(task)]

 from content import *
 import glob
+ARC = "arc"
 HELLASWAG = "hellaswag"
 MMLU = "mmlu"
+TRUTHFULQA = "truthfulqa"
 BENCHMARKS = [ARC, HELLASWAG, MMLU, TRUTHFULQA]
 METRICS = ["acc_norm", "acc_norm", "acc_norm", "mc2"]
         pretrained_models.add(pretrained)
         for lang_task, perfs in results.items():
+            task, lang = lang_task.split('_')
+            assert task in BENCHMARKS
             if lang and task:
                 metric = METRICS[BENCHMARKS.index(task)]