Spaces:
Running
Running
fix load
Browse files- .gitignore +2 -0
- tlem.py +20 -19
.gitignore
CHANGED
|
@@ -2,3 +2,5 @@ __pycache__
|
|
| 2 |
*.ju.py
|
| 3 |
tests
|
| 4 |
|
|
|
|
|
|
|
|
|
| 2 |
*.ju.py
|
| 3 |
tests
|
| 4 |
|
| 5 |
+
README_files
|
| 6 |
+
.ipynb_checkpoints
|
tlem.py
CHANGED
|
@@ -126,6 +126,7 @@ class Suite(EvaluationSuite):
|
|
| 126 |
|
| 127 |
def get_suite(self, name) -> dict[str, Task]:
|
| 128 |
chat = False
|
|
|
|
| 129 |
match name:
|
| 130 |
case _ if "chat" in name:
|
| 131 |
chat = True
|
|
@@ -171,7 +172,6 @@ class Suite(EvaluationSuite):
|
|
| 171 |
)
|
| 172 |
|
| 173 |
case "open-leaderboard":
|
| 174 |
-
suite = {}
|
| 175 |
for name in [
|
| 176 |
"arc",
|
| 177 |
"hellaswag",
|
|
@@ -181,23 +181,24 @@ class Suite(EvaluationSuite):
|
|
| 181 |
# "truthful_qa",
|
| 182 |
"drop",
|
| 183 |
]:
|
| 184 |
-
suite
|
| 185 |
case "tlem":
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
| 199 |
case "all":
|
| 200 |
-
|
|
|
|
| 201 |
case _:
|
| 202 |
raise NotImplementedError(
|
| 203 |
f"{name} is not supported in {self.supported_datasets}"
|
|
@@ -205,8 +206,7 @@ class Suite(EvaluationSuite):
|
|
| 205 |
|
| 206 |
if isinstance(suite, Task):
|
| 207 |
suite = [suite]
|
| 208 |
-
|
| 209 |
-
suite = {name: suite}
|
| 210 |
|
| 211 |
return suite
|
| 212 |
|
|
@@ -231,7 +231,8 @@ class Suite(EvaluationSuite):
|
|
| 231 |
return suite
|
| 232 |
|
| 233 |
def load(self, name):
|
| 234 |
-
self.
|
|
|
|
| 235 |
self.suite = self.drop_duplicates(self.suite)
|
| 236 |
# return self
|
| 237 |
|
|
|
|
| 126 |
|
| 127 |
def get_suite(self, name) -> dict[str, Task]:
|
| 128 |
chat = False
|
| 129 |
+
suite={}
|
| 130 |
match name:
|
| 131 |
case _ if "chat" in name:
|
| 132 |
chat = True
|
|
|
|
| 172 |
)
|
| 173 |
|
| 174 |
case "open-leaderboard":
|
|
|
|
| 175 |
for name in [
|
| 176 |
"arc",
|
| 177 |
"hellaswag",
|
|
|
|
| 181 |
# "truthful_qa",
|
| 182 |
"drop",
|
| 183 |
]:
|
| 184 |
+
suite.update(self.get_suite(name))
|
| 185 |
case "tlem":
|
| 186 |
+
for name in [
|
| 187 |
+
"arc",
|
| 188 |
+
"hellaswag",
|
| 189 |
+
"mmlu-chat",
|
| 190 |
+
"winogrande",
|
| 191 |
+
"gsm8k",
|
| 192 |
+
# "truthful_qa",
|
| 193 |
+
"cmmlu-chat",
|
| 194 |
+
"ceval-chat",
|
| 195 |
+
"bbh",
|
| 196 |
+
]:
|
| 197 |
+
suite.update(self.get_suite(name))
|
| 198 |
+
|
| 199 |
case "all":
|
| 200 |
+
for name in self.supported_datasets:
|
| 201 |
+
suite.update(self.get_suite(name))
|
| 202 |
case _:
|
| 203 |
raise NotImplementedError(
|
| 204 |
f"{name} is not supported in {self.supported_datasets}"
|
|
|
|
| 206 |
|
| 207 |
if isinstance(suite, Task):
|
| 208 |
suite = [suite]
|
| 209 |
+
suite = {name: suite}
|
|
|
|
| 210 |
|
| 211 |
return suite
|
| 212 |
|
|
|
|
| 231 |
return suite
|
| 232 |
|
| 233 |
def load(self, name):
|
| 234 |
+
sub_suite = self.get_suite(name)
|
| 235 |
+
self.suite.update(sub_suite)
|
| 236 |
self.suite = self.drop_duplicates(self.suite)
|
| 237 |
# return self
|
| 238 |
|