Commit
·
9b382e3
1
Parent(s):
bd0b666
fix: Allow languages that do not have all tasks
Browse files
app.py
CHANGED
|
@@ -258,8 +258,18 @@ def update_model_ids_dropdown(
|
|
| 258 |
logger.info("No languages selected. Resetting model ids dropdown.")
|
| 259 |
return gr.update(choices=[], value=[])
|
| 260 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 261 |
filtered_results_dfs = {
|
| 262 |
-
language: df
|
| 263 |
for language, df in results_dfs.items()
|
| 264 |
if language.name in language_names
|
| 265 |
}
|
|
@@ -337,7 +347,6 @@ def produce_radial_plot(
|
|
| 337 |
f"{language_names!r}..."
|
| 338 |
)
|
| 339 |
|
| 340 |
-
tasks = ALL_TASKS
|
| 341 |
languages = [ALL_LANGUAGES[language_name] for language_name in language_names]
|
| 342 |
|
| 343 |
results_dfs_filtered = {
|
|
@@ -346,6 +355,12 @@ def produce_radial_plot(
|
|
| 346 |
if language.name in language_names
|
| 347 |
}
|
| 348 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 349 |
# Add all the evaluation results for each model
|
| 350 |
results: list[list[float]] = list()
|
| 351 |
for model_id in model_ids:
|
|
@@ -453,10 +468,7 @@ def fetch_results() -> dict[Language, pd.DataFrame]:
|
|
| 453 |
lambda list_or_nan:
|
| 454 |
np.mean(list_or_nan) if list_or_nan == list_or_nan else list_or_nan
|
| 455 |
).dropna()
|
| 456 |
-
|
| 457 |
-
results_dfs[language] = pd.DataFrame()
|
| 458 |
-
else:
|
| 459 |
-
results_dfs[language] = results_df
|
| 460 |
|
| 461 |
logger.info("Successfully fetched results from ScandEval benchmark.")
|
| 462 |
|
|
|
|
| 258 |
logger.info("No languages selected. Resetting model ids dropdown.")
|
| 259 |
return gr.update(choices=[], value=[])
|
| 260 |
|
| 261 |
+
tasks = [
|
| 262 |
+
task
|
| 263 |
+
for task in ALL_TASKS
|
| 264 |
+
if all(
|
| 265 |
+
task in df.columns
|
| 266 |
+
for language, df in results_dfs.items()
|
| 267 |
+
if language.name in language_names
|
| 268 |
+
)
|
| 269 |
+
]
|
| 270 |
+
|
| 271 |
filtered_results_dfs = {
|
| 272 |
+
language: df[tasks]
|
| 273 |
for language, df in results_dfs.items()
|
| 274 |
if language.name in language_names
|
| 275 |
}
|
|
|
|
| 347 |
f"{language_names!r}..."
|
| 348 |
)
|
| 349 |
|
|
|
|
| 350 |
languages = [ALL_LANGUAGES[language_name] for language_name in language_names]
|
| 351 |
|
| 352 |
results_dfs_filtered = {
|
|
|
|
| 355 |
if language.name in language_names
|
| 356 |
}
|
| 357 |
|
| 358 |
+
tasks = [
|
| 359 |
+
task
|
| 360 |
+
for task in ALL_TASKS
|
| 361 |
+
if all(task in df.columns for df in results_dfs_filtered.values())
|
| 362 |
+
]
|
| 363 |
+
|
| 364 |
# Add all the evaluation results for each model
|
| 365 |
results: list[list[float]] = list()
|
| 366 |
for model_id in model_ids:
|
|
|
|
| 468 |
lambda list_or_nan:
|
| 469 |
np.mean(list_or_nan) if list_or_nan == list_or_nan else list_or_nan
|
| 470 |
).dropna()
|
| 471 |
+
results_dfs[language] = results_df
|
|
|
|
|
|
|
|
|
|
| 472 |
|
| 473 |
logger.info("Successfully fetched results from ScandEval benchmark.")
|
| 474 |
|