VLLMs-Leaderboard

Runtime error

hieunguyen1053 commited on Nov 20, 2023

Commit

0cf0987

1 Parent(s): f8b127b

Update src/tasks.py

Files changed (1) hide show

src/tasks.py CHANGED Viewed

@@ -12,7 +12,7 @@ class Task:
 class Lambada(Task):
     code = "lambada_vi"
-    name = "LAMBADA"
     metric = "ppl"
     higher_is_better = False
     num_fewshot = 0
@@ -21,7 +21,7 @@ class Lambada(Task):
 class Arc(Task):
     code = "arc_vi"
-    name = "ARC"
     metric = "acc_norm"
     higher_is_better = True
     num_fewshot = 25
@@ -30,7 +30,7 @@ class Arc(Task):
 class HellaSwag(Task):
     code = "hellaswag_vi"
-    name = "HellaSwag"
     metric = "acc_norm"
     higher_is_better = True
     num_fewshot = 10
@@ -39,7 +39,7 @@ class HellaSwag(Task):
 class MMLU(Task):
     code = "mmlu_vi"
-    name = "MMLU"
     metric = "acc_norm"
     higher_is_better = True
     num_fewshot = 5
@@ -48,7 +48,7 @@ class MMLU(Task):
 class TruthfulQA(Task):
     code = "truthfulqa_vi"
-    name = "TruthfulQA"
     metric = "mc2"
     higher_is_better = True
     num_fewshot = 0
@@ -75,7 +75,7 @@ class IWSLT2023_en_vi(Task):
 class WikipediaQA(Task):
     code = "wikipediaqa_vi"
-    name = "Wikipedia QA"
     metric = "acc_norm"
     higher_is_better = True
     num_fewshot = 5
@@ -84,11 +84,11 @@ class WikipediaQA(Task):
 class Comprehension(Task):
     code = "comprehension_vi"
-    name = "Comprehension"
     metric = "acc_norm"
     higher_is_better = True
     num_fewshot = 0
     private_test: bool = True
-TASKS = [Arc, HellaSwag, MMLU, TruthfulQA, Grade12Exams] + [Lambada, WikipediaQA, Comprehension]

 class Lambada(Task):
     code = "lambada_vi"
+    name = "LAMBADA-vi"
     metric = "ppl"
     higher_is_better = False
     num_fewshot = 0
 class Arc(Task):
     code = "arc_vi"
+    name = "ARC-vi"
     metric = "acc_norm"
     higher_is_better = True
     num_fewshot = 25
 class HellaSwag(Task):
     code = "hellaswag_vi"
+    name = "HellaSwag-vi"
     metric = "acc_norm"
     higher_is_better = True
     num_fewshot = 10
 class MMLU(Task):
     code = "mmlu_vi"
+    name = "MMLU-vi"
     metric = "acc_norm"
     higher_is_better = True
     num_fewshot = 5
 class TruthfulQA(Task):
     code = "truthfulqa_vi"
+    name = "TruthfulQA-vi"
     metric = "mc2"
     higher_is_better = True
     num_fewshot = 0
 class WikipediaQA(Task):
     code = "wikipediaqa_vi"
+    name = "GeneralKnowledgeQA-vi"
     metric = "acc_norm"
     higher_is_better = True
     num_fewshot = 5
 class Comprehension(Task):
     code = "comprehension_vi"
+    name = "ComprehensionQA-vi"
     metric = "acc_norm"
     higher_is_better = True
     num_fewshot = 0
     private_test: bool = True
+TASKS = [Arc, HellaSwag, MMLU, TruthfulQA] + [Lambada, WikipediaQA, Comprehension]