Spaces:
Running
Running
upd
Browse files
tasks.py
CHANGED
|
@@ -94,6 +94,10 @@ class Task:
|
|
| 94 |
def samples(self):
|
| 95 |
return self.dataset[self.input_column]
|
| 96 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 97 |
@cached_property
|
| 98 |
def dataset(self):
|
| 99 |
ds = (
|
|
@@ -149,12 +153,10 @@ class Task:
|
|
| 149 |
)
|
| 150 |
return metric._compute
|
| 151 |
|
| 152 |
-
@
|
| 153 |
def result(self) -> dict:
|
| 154 |
assert self.outputs, "Please run the task first."
|
| 155 |
-
results = self.metric(
|
| 156 |
-
responses=self.outputs, references=self.dataset[self.label_column]
|
| 157 |
-
)
|
| 158 |
# logging.info(f"{self.name}:{results}")
|
| 159 |
return results
|
| 160 |
|
|
|
|
| 94 |
def samples(self):
|
| 95 |
return self.dataset[self.input_column]
|
| 96 |
|
| 97 |
+
@cached_property
|
| 98 |
+
def labels(self):
|
| 99 |
+
return self.dataset[self.label_column]
|
| 100 |
+
|
| 101 |
@cached_property
|
| 102 |
def dataset(self):
|
| 103 |
ds = (
|
|
|
|
| 153 |
)
|
| 154 |
return metric._compute
|
| 155 |
|
| 156 |
+
@property
|
| 157 |
def result(self) -> dict:
|
| 158 |
assert self.outputs, "Please run the task first."
|
| 159 |
+
results = self.metric(self.outputs, self.labels)
|
|
|
|
|
|
|
| 160 |
# logging.info(f"{self.name}:{results}")
|
| 161 |
return results
|
| 162 |
|
tlem.py
CHANGED
|
@@ -82,6 +82,7 @@ class Suite(EvaluationSuite):
|
|
| 82 |
"ceval-chat",
|
| 83 |
"bbh",
|
| 84 |
"drop",
|
|
|
|
| 85 |
]
|
| 86 |
|
| 87 |
def __getitem__(self, key) -> Task:
|
|
@@ -215,6 +216,7 @@ class Suite(EvaluationSuite):
|
|
| 215 |
except ValueError:
|
| 216 |
logging.debug(f"add {task.name} to suite.")
|
| 217 |
self.tasks.append(task)
|
|
|
|
| 218 |
return self.tasks[-1]
|
| 219 |
|
| 220 |
def drop_duplicates(self, suite):
|
|
|
|
| 82 |
"ceval-chat",
|
| 83 |
"bbh",
|
| 84 |
"drop",
|
| 85 |
+
"MATH",
|
| 86 |
]
|
| 87 |
|
| 88 |
def __getitem__(self, key) -> Task:
|
|
|
|
| 216 |
except ValueError:
|
| 217 |
logging.debug(f"add {task.name} to suite.")
|
| 218 |
self.tasks.append(task)
|
| 219 |
+
logging.debug(self.tasks)
|
| 220 |
return self.tasks[-1]
|
| 221 |
|
| 222 |
def drop_duplicates(self, suite):
|