Spaces:
Runtime error
Runtime error
Commit ·
8e3d8c1
1
Parent(s): 3be882c
update
Browse files- src/backend/run_eval_suite.py +2 -0
- src/backend/tasks/faithdial/faithdial.yaml +0 -1
- src/backend/tasks/faithdial/faithdial_v2.yaml +0 -1
- src/backend/tasks/fever/fever10.yaml +0 -1
- src/backend/tasks/fever/fever11.yaml +0 -1
- src/backend/tasks/halueval/halueval_dialogue.yaml +0 -2
- src/backend/tasks/halueval/halueval_qa.yaml +0 -2
- src/backend/tasks/halueval/halueval_summarization.yaml +0 -2
- src/backend/tasks/truefalse/truefalse.yaml +0 -1
- src/backend/tasks/xsum/xsum.yaml +0 -2
src/backend/run_eval_suite.py
CHANGED
|
@@ -19,6 +19,8 @@ def run_evaluation(eval_request: EvalRequest, task_names, num_fewshot, batch_siz
|
|
| 19 |
# include_task_folder("src/backend/tasks/")
|
| 20 |
# initialize_tasks('INFO')
|
| 21 |
|
|
|
|
|
|
|
| 22 |
task_manager = TaskManager(include_path="./src/backend/tasks/")
|
| 23 |
# task_manager.initialize_tasks('INFO')
|
| 24 |
|
|
|
|
| 19 |
# include_task_folder("src/backend/tasks/")
|
| 20 |
# initialize_tasks('INFO')
|
| 21 |
|
| 22 |
+
print(f"Allocating task manager for: {task_names}")
|
| 23 |
+
|
| 24 |
task_manager = TaskManager(include_path="./src/backend/tasks/")
|
| 25 |
# task_manager.initialize_tasks('INFO')
|
| 26 |
|
src/backend/tasks/faithdial/faithdial.yaml
CHANGED
|
@@ -1,4 +1,3 @@
|
|
| 1 |
-
group: faithdial
|
| 2 |
task: faithdial_hallu
|
| 3 |
dataset_path: McGill-NLP/FaithDial
|
| 4 |
training_split: train
|
|
|
|
|
|
|
| 1 |
task: faithdial_hallu
|
| 2 |
dataset_path: McGill-NLP/FaithDial
|
| 3 |
training_split: train
|
src/backend/tasks/faithdial/faithdial_v2.yaml
CHANGED
|
@@ -1,4 +1,3 @@
|
|
| 1 |
-
group: faithdial
|
| 2 |
task: faithdial_hallu_v2
|
| 3 |
dataset_path: McGill-NLP/FaithDial
|
| 4 |
training_split: train
|
|
|
|
|
|
|
| 1 |
task: faithdial_hallu_v2
|
| 2 |
dataset_path: McGill-NLP/FaithDial
|
| 3 |
training_split: train
|
src/backend/tasks/fever/fever10.yaml
CHANGED
|
@@ -1,4 +1,3 @@
|
|
| 1 |
-
group: fever
|
| 2 |
task: fever10
|
| 3 |
dataset_path: fever
|
| 4 |
dataset_name: v1.0
|
|
|
|
|
|
|
| 1 |
task: fever10
|
| 2 |
dataset_path: fever
|
| 3 |
dataset_name: v1.0
|
src/backend/tasks/fever/fever11.yaml
CHANGED
|
@@ -1,4 +1,3 @@
|
|
| 1 |
-
group: fever
|
| 2 |
task: fever11
|
| 3 |
dataset_path: pminervini/hl-fever
|
| 4 |
dataset_name: v1.0
|
|
|
|
|
|
|
| 1 |
task: fever11
|
| 2 |
dataset_path: pminervini/hl-fever
|
| 3 |
dataset_name: v1.0
|
src/backend/tasks/halueval/halueval_dialogue.yaml
CHANGED
|
@@ -1,5 +1,3 @@
|
|
| 1 |
-
group:
|
| 2 |
-
- halueval
|
| 3 |
task: halueval_dialogue
|
| 4 |
dataset_path: pminervini/HaluEval
|
| 5 |
dataset_name: dialogue_samples
|
|
|
|
|
|
|
|
|
|
| 1 |
task: halueval_dialogue
|
| 2 |
dataset_path: pminervini/HaluEval
|
| 3 |
dataset_name: dialogue_samples
|
src/backend/tasks/halueval/halueval_qa.yaml
CHANGED
|
@@ -1,5 +1,3 @@
|
|
| 1 |
-
group:
|
| 2 |
-
- halueval
|
| 3 |
task: halueval_qa
|
| 4 |
dataset_path: pminervini/HaluEval
|
| 5 |
dataset_name: qa_samples
|
|
|
|
|
|
|
|
|
|
| 1 |
task: halueval_qa
|
| 2 |
dataset_path: pminervini/HaluEval
|
| 3 |
dataset_name: qa_samples
|
src/backend/tasks/halueval/halueval_summarization.yaml
CHANGED
|
@@ -1,5 +1,3 @@
|
|
| 1 |
-
group:
|
| 2 |
-
- halueval
|
| 3 |
task: halueval_summarization
|
| 4 |
dataset_path: pminervini/HaluEval
|
| 5 |
dataset_name: summarization_samples
|
|
|
|
|
|
|
|
|
|
| 1 |
task: halueval_summarization
|
| 2 |
dataset_path: pminervini/HaluEval
|
| 3 |
dataset_name: summarization_samples
|
src/backend/tasks/truefalse/truefalse.yaml
CHANGED
|
@@ -1,4 +1,3 @@
|
|
| 1 |
-
group: truefalse
|
| 2 |
task: truefalse_cieacf
|
| 3 |
dataset_path: pminervini/true-false
|
| 4 |
dataset_name: default
|
|
|
|
|
|
|
| 1 |
task: truefalse_cieacf
|
| 2 |
dataset_path: pminervini/true-false
|
| 3 |
dataset_name: default
|
src/backend/tasks/xsum/xsum.yaml
DELETED
|
@@ -1,2 +0,0 @@
|
|
| 1 |
-
task:
|
| 2 |
-
- xsum
|
|
|
|
|
|
|
|
|