Spaces:
Runtime error
Runtime error
Kyuho Heo commited on
Commit ยท
e74285c
1
Parent(s): 2b726be
spacerank
Browse files- .gitattributes +1 -1
- README.md +6 -41
- app.py +48 -110
- constants.py +36 -0
- handlers.py +86 -0
- src/about.py +3 -3
- src/config.py +22 -22
- src/data/export_category_250618.csv +0 -3
- src/data/export_category_250709.csv +0 -0
- src/data/export_lang_250618.csv +0 -3
- src/data/export_lang_250709.csv +0 -0
- src/data_utils.py +2 -2
- src/display/css_html_js.py +281 -1
- src/display/formatting.py +125 -0
- src/display/utils.py +16 -15
- src/envs.py +1 -1
- src/submission/check_validity.py +1 -1
- src/submission/submit.py +18 -7
- ui.py +228 -0
- utils.py +15 -1
.gitattributes
CHANGED
|
@@ -25,6 +25,7 @@
|
|
| 25 |
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 26 |
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 27 |
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
| 28 |
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 29 |
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 30 |
*.wasm filter=lfs diff=lfs merge=lfs -text
|
|
@@ -32,4 +33,3 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 32 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 33 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 35 |
-
scale-hf-logo.png filter=lfs diff=lfs merge=lfs -text
|
|
|
|
| 25 |
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 26 |
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 27 |
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
| 29 |
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 30 |
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 31 |
*.wasm filter=lfs diff=lfs merge=lfs -text
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
README.md
CHANGED
|
@@ -1,49 +1,14 @@
|
|
| 1 |
---
|
| 2 |
-
title:
|
| 3 |
-
emoji:
|
| 4 |
colorFrom: green
|
| 5 |
-
colorTo:
|
| 6 |
sdk: gradio
|
|
|
|
| 7 |
app_file: app.py
|
| 8 |
-
pinned:
|
| 9 |
license: apache-2.0
|
| 10 |
-
short_description: Leaderboards for evaluating LLMs
|
| 11 |
-
sdk_version: 5.19.0
|
| 12 |
hf_oauth: true
|
| 13 |
-
hf_oauth_scopes:
|
| 14 |
-
- read-repos
|
| 15 |
---
|
| 16 |
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
Most of the variables to change for a default leaderboard are in `src/env.py` (replace the path for your leaderboard) and `src/about.py` (for tasks).
|
| 20 |
-
|
| 21 |
-
Results files should have the following format and be stored as json files:
|
| 22 |
-
```json
|
| 23 |
-
{
|
| 24 |
-
"config": {
|
| 25 |
-
"model_dtype": "torch.float16", # or torch.bfloat16 or 8bit or 4bit
|
| 26 |
-
"model_name": "path of the model on the hub: org/model",
|
| 27 |
-
"model_sha": "revision on the hub",
|
| 28 |
-
},
|
| 29 |
-
"results": {
|
| 30 |
-
"task_name": {
|
| 31 |
-
"metric_name": score,
|
| 32 |
-
},
|
| 33 |
-
"task_name2": {
|
| 34 |
-
"metric_name": score,
|
| 35 |
-
}
|
| 36 |
-
}
|
| 37 |
-
}
|
| 38 |
-
```
|
| 39 |
-
|
| 40 |
-
Request files are created automatically by this tool.
|
| 41 |
-
|
| 42 |
-
If you encounter problem on the space, don't hesitate to restart it to remove the create eval-queue, eval-queue-bk, eval-results and eval-results-bk created folder.
|
| 43 |
-
|
| 44 |
-
# Code logic for more complex edits
|
| 45 |
-
|
| 46 |
-
You'll find
|
| 47 |
-
- the main table' columns names and properties in `src/display/utils.py`
|
| 48 |
-
- the logic to read all results and request files, then convert them in dataframe lines, in `src/leaderboard/read_evals.py`, and `src/populate.py`
|
| 49 |
-
- the logic to allow or filter submissions in `src/submission/submit.py` and `src/submission/check_validity.py`
|
|
|
|
| 1 |
---
|
| 2 |
+
title: test_space
|
| 3 |
+
emoji: ๐
|
| 4 |
colorFrom: green
|
| 5 |
+
colorTo: red
|
| 6 |
sdk: gradio
|
| 7 |
+
sdk_version: 5.38.0
|
| 8 |
app_file: app.py
|
| 9 |
+
pinned: false
|
| 10 |
license: apache-2.0
|
|
|
|
|
|
|
| 11 |
hf_oauth: true
|
|
|
|
|
|
|
| 12 |
---
|
| 13 |
|
| 14 |
+
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app.py
CHANGED
|
@@ -6,8 +6,8 @@ from apscheduler.schedulers.background import BackgroundScheduler
|
|
| 6 |
from huggingface_hub import snapshot_download
|
| 7 |
from src.data_utils import get_dataframe_category, get_dataframe_language
|
| 8 |
import src.config as configs
|
| 9 |
-
from utils import get_profile, get_organizations, get_profile_and_organizations
|
| 10 |
-
|
| 11 |
|
| 12 |
from src.about import (
|
| 13 |
CITATION_BUTTON_LABEL,
|
|
@@ -36,24 +36,36 @@ from src.populate import get_evaluation_queue_df, get_leaderboard_df
|
|
| 36 |
from src.submission.submit import add_new_eval_option1, add_new_eval_option2
|
| 37 |
|
| 38 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
def restart_space():
|
| 40 |
API.restart_space(repo_id=REPO_ID)
|
| 41 |
|
| 42 |
### Space initialisation
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
restart_space
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
snapshot_download
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
|
|
|
|
|
|
| 57 |
|
| 58 |
(
|
| 59 |
finished_eval_queue_df,
|
|
@@ -61,9 +73,6 @@ except Exception:
|
|
| 61 |
pending_eval_queue_df,
|
| 62 |
) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
|
| 63 |
|
| 64 |
-
tab_keys = ["Category", "Language"]
|
| 65 |
-
|
| 66 |
-
|
| 67 |
demo = gr.Blocks(css=custom_css)
|
| 68 |
with demo:
|
| 69 |
gr.HTML(TITLE)
|
|
@@ -71,93 +80,21 @@ with demo:
|
|
| 71 |
user_state = gr.State()
|
| 72 |
organization_state = gr.State()
|
| 73 |
with gr.Tabs(elem_classes="tab-buttons") as tabs:
|
| 74 |
-
|
| 75 |
-
def search_leaderboard(query, df):
|
| 76 |
-
if not query.strip():
|
| 77 |
-
return df
|
| 78 |
-
filtered = df[df.apply(lambda row: row.astype(str).str.contains(query, case=False).any(), axis=1)]
|
| 79 |
-
return filtered
|
| 80 |
-
|
| 81 |
-
def update_modelselector_group(groups, df):
|
| 82 |
-
"""
|
| 83 |
-
groups (gr.CheckboxGroup): List of currently selected models
|
| 84 |
-
df (DataFrame or gr.State): Current dataframe
|
| 85 |
-
"""
|
| 86 |
-
print("groups:", groups)
|
| 87 |
-
if not groups:
|
| 88 |
-
return None
|
| 89 |
-
|
| 90 |
-
filtered_df = df[df["Group"].isin(groups)]
|
| 91 |
-
models = filtered_df["Model Name"].unique().tolist()
|
| 92 |
-
|
| 93 |
-
return models
|
| 94 |
-
|
| 95 |
-
def update_columnselector_group(columns, groups, df):
|
| 96 |
-
print("column groups:", groups)
|
| 97 |
-
|
| 98 |
-
columns = [c for c in columns if c in df.columns[:3]]
|
| 99 |
-
|
| 100 |
-
columns.extend(df.columns[3:])
|
| 101 |
-
|
| 102 |
-
print(columns)
|
| 103 |
-
|
| 104 |
-
return columns
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
def update_leaderboard(models, columns, df):
|
| 108 |
-
print("models:", models)
|
| 109 |
-
print("columns:", columns)
|
| 110 |
-
|
| 111 |
-
filtered_df = df[df["Model Name"].isin(models)]
|
| 112 |
-
filtered_columns = [c for c in df.columns if c in columns or c in ["Model Name"]]
|
| 113 |
-
filtered_df = filtered_df[filtered_columns]
|
| 114 |
-
|
| 115 |
-
for col in filtered_df.select_dtypes(include="number").columns:
|
| 116 |
-
filtered_df[col] = filtered_df[col].round(3)
|
| 117 |
-
|
| 118 |
-
return filtered_df
|
| 119 |
-
|
| 120 |
-
def get_models_by_group(df, groups):
|
| 121 |
-
return df[df["Group"].isin(groups)]["Model Name"].tolist()
|
| 122 |
-
|
| 123 |
-
for _, key in enumerate(tab_keys):
|
| 124 |
if key == "Category":
|
| 125 |
-
|
|
|
|
| 126 |
else:
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
search_box = gr.Textbox(label="Search Model by Name")
|
| 138 |
-
group_list = df["Group"].unique().tolist()
|
| 139 |
-
group_selector = gr.CheckboxGroup(choices=df["Group"].unique().tolist(), value=group_list, label="Select Model Group")
|
| 140 |
-
|
| 141 |
-
if key == "Category":
|
| 142 |
-
column_selector = gr.CheckboxGroup(choices=df.columns.tolist()[3:], value=configs.ON_LOAD_COLUMNS_CATEGORY[3:], label="Select Columns")
|
| 143 |
-
else:
|
| 144 |
-
column_selector = gr.CheckboxGroup(choices=df.columns.tolist()[3:], value=configs.ON_LOAD_COLUMNS_LANG[3:], label="Select Columns")
|
| 145 |
-
|
| 146 |
-
with gr.Column():
|
| 147 |
-
with gr.Accordion("Model List", open=False):
|
| 148 |
-
model_group = df["Model Name"].tolist()
|
| 149 |
-
model_selector = gr.CheckboxGroup(choices=df["Model Name"].tolist(), value=model_group, label="Select Models")
|
| 150 |
-
|
| 151 |
-
ld = gr.DataFrame(
|
| 152 |
-
value=df.round(3)
|
| 153 |
-
)
|
| 154 |
-
|
| 155 |
-
# Define change functions for user interaction
|
| 156 |
-
search_box.change(fn=search_leaderboard, inputs=[search_box, df_state], outputs=ld)
|
| 157 |
-
group_selector.change(fn=update_modelselector_group, inputs=[group_selector, df_state], outputs=model_selector)
|
| 158 |
-
model_selector.change(fn=update_leaderboard, inputs=[model_selector, column_selector, df_state], outputs=ld)
|
| 159 |
-
column_selector.change(fn=update_leaderboard, inputs=[model_selector, column_selector, df_state], outputs=ld)
|
| 160 |
-
|
| 161 |
with gr.TabItem("๐ About", elem_id="llm-benchmark-tab-table", id=2):
|
| 162 |
gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
|
| 163 |
|
|
@@ -175,10 +112,10 @@ with demo:
|
|
| 175 |
with gr.Row():
|
| 176 |
with gr.Column():
|
| 177 |
benchmark_type = gr.Dropdown(
|
| 178 |
-
choices=["
|
| 179 |
label="The name of the benchmark to be evaluated",
|
| 180 |
multiselect=False,
|
| 181 |
-
value="
|
| 182 |
interactive=True,
|
| 183 |
)
|
| 184 |
model_name_textbox = gr.Textbox(label="Model name")
|
|
@@ -192,7 +129,7 @@ with demo:
|
|
| 192 |
)
|
| 193 |
base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)")
|
| 194 |
vllm_version_type = gr.Dropdown(
|
| 195 |
-
choices=
|
| 196 |
label="vLLM version",
|
| 197 |
multiselect=False,
|
| 198 |
value="v0.9.2",
|
|
@@ -239,10 +176,10 @@ with demo:
|
|
| 239 |
with gr.Row():
|
| 240 |
with gr.Column():
|
| 241 |
benchmark_type2 = gr.Dropdown(
|
| 242 |
-
choices=["
|
| 243 |
label="The name of the benchmark to be evaluated",
|
| 244 |
multiselect=False,
|
| 245 |
-
value="
|
| 246 |
interactive=True,
|
| 247 |
)
|
| 248 |
model_name_textbox2 = gr.Textbox(label="Model name")
|
|
@@ -313,7 +250,8 @@ with demo:
|
|
| 313 |
show_copy_button=True,
|
| 314 |
)
|
| 315 |
|
|
|
|
| 316 |
scheduler = BackgroundScheduler()
|
| 317 |
scheduler.add_job(restart_space, "interval", seconds=1800)
|
| 318 |
scheduler.start()
|
| 319 |
-
demo.queue(default_concurrency_limit=40).launch()
|
|
|
|
| 6 |
from huggingface_hub import snapshot_download
|
| 7 |
from src.data_utils import get_dataframe_category, get_dataframe_language
|
| 8 |
import src.config as configs
|
| 9 |
+
from utils import get_profile, get_organizations, get_profile_and_organizations, download_with_restart
|
| 10 |
+
|
| 11 |
|
| 12 |
from src.about import (
|
| 13 |
CITATION_BUTTON_LABEL,
|
|
|
|
| 36 |
from src.submission.submit import add_new_eval_option1, add_new_eval_option2
|
| 37 |
|
| 38 |
|
| 39 |
+
from handlers import (
|
| 40 |
+
search_leaderboard,
|
| 41 |
+
update_modelselector_group,
|
| 42 |
+
update_columnselector_group,
|
| 43 |
+
update_leaderboard,
|
| 44 |
+
get_models_by_group,
|
| 45 |
+
)
|
| 46 |
+
from ui import create_leaderboard_tab
|
| 47 |
+
from constants import TAB_KEYS, TAB_NAMES, VLLM_VERSIONS
|
| 48 |
+
|
| 49 |
def restart_space():
|
| 50 |
API.restart_space(repo_id=REPO_ID)
|
| 51 |
|
| 52 |
### Space initialisation
|
| 53 |
+
download_with_restart(
|
| 54 |
+
snapshot_download,
|
| 55 |
+
repo_id=QUEUE_REPO,
|
| 56 |
+
local_dir=EVAL_REQUESTS_PATH,
|
| 57 |
+
repo_type="dataset",
|
| 58 |
+
token=TOKEN,
|
| 59 |
+
restart_func=restart_space
|
| 60 |
+
)
|
| 61 |
+
download_with_restart(
|
| 62 |
+
snapshot_download,
|
| 63 |
+
repo_id=RESULTS_REPO,
|
| 64 |
+
local_dir=EVAL_RESULTS_PATH,
|
| 65 |
+
repo_type="dataset",
|
| 66 |
+
token=TOKEN,
|
| 67 |
+
restart_func=restart_space
|
| 68 |
+
)
|
| 69 |
|
| 70 |
(
|
| 71 |
finished_eval_queue_df,
|
|
|
|
| 73 |
pending_eval_queue_df,
|
| 74 |
) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
|
| 75 |
|
|
|
|
|
|
|
|
|
|
| 76 |
demo = gr.Blocks(css=custom_css)
|
| 77 |
with demo:
|
| 78 |
gr.HTML(TITLE)
|
|
|
|
| 80 |
user_state = gr.State()
|
| 81 |
organization_state = gr.State()
|
| 82 |
with gr.Tabs(elem_classes="tab-buttons") as tabs:
|
| 83 |
+
for _, key in enumerate(TAB_KEYS):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 84 |
if key == "Category":
|
| 85 |
+
df = get_dataframe_category()
|
| 86 |
+
column_selector_value = configs.ON_LOAD_COLUMNS_CATEGORY[3:]
|
| 87 |
else:
|
| 88 |
+
df = get_dataframe_language()
|
| 89 |
+
column_selector_value = configs.ON_LOAD_COLUMNS_LANG[3:]
|
| 90 |
+
create_leaderboard_tab(
|
| 91 |
+
df,
|
| 92 |
+
key,
|
| 93 |
+
search_leaderboard,
|
| 94 |
+
update_modelselector_group,
|
| 95 |
+
update_leaderboard,
|
| 96 |
+
column_selector_value
|
| 97 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 98 |
with gr.TabItem("๐ About", elem_id="llm-benchmark-tab-table", id=2):
|
| 99 |
gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
|
| 100 |
|
|
|
|
| 112 |
with gr.Row():
|
| 113 |
with gr.Column():
|
| 114 |
benchmark_type = gr.Dropdown(
|
| 115 |
+
choices=["TRUEBench v0.1"],
|
| 116 |
label="The name of the benchmark to be evaluated",
|
| 117 |
multiselect=False,
|
| 118 |
+
value="TRUEBench v0.1",
|
| 119 |
interactive=True,
|
| 120 |
)
|
| 121 |
model_name_textbox = gr.Textbox(label="Model name")
|
|
|
|
| 129 |
)
|
| 130 |
base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)")
|
| 131 |
vllm_version_type = gr.Dropdown(
|
| 132 |
+
choices=VLLM_VERSIONS,
|
| 133 |
label="vLLM version",
|
| 134 |
multiselect=False,
|
| 135 |
value="v0.9.2",
|
|
|
|
| 176 |
with gr.Row():
|
| 177 |
with gr.Column():
|
| 178 |
benchmark_type2 = gr.Dropdown(
|
| 179 |
+
choices=["TRUEBench v0.1"],
|
| 180 |
label="The name of the benchmark to be evaluated",
|
| 181 |
multiselect=False,
|
| 182 |
+
value="TRUEBench v0.1",
|
| 183 |
interactive=True,
|
| 184 |
)
|
| 185 |
model_name_textbox2 = gr.Textbox(label="Model name")
|
|
|
|
| 250 |
show_copy_button=True,
|
| 251 |
)
|
| 252 |
|
| 253 |
+
|
| 254 |
scheduler = BackgroundScheduler()
|
| 255 |
scheduler.add_job(restart_space, "interval", seconds=1800)
|
| 256 |
scheduler.start()
|
| 257 |
+
demo.queue(default_concurrency_limit=40).launch()
|
constants.py
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# constants.py
|
| 2 |
+
|
| 3 |
+
TAB_KEYS = ["Category", "Language"]
|
| 4 |
+
|
| 5 |
+
TAB_NAMES = {
|
| 6 |
+
"Category": "TRUEBench v0.1 (Category ๐ง)",
|
| 7 |
+
"Language": "TRUEBench v0.1 (Language ๐)"
|
| 8 |
+
}
|
| 9 |
+
|
| 10 |
+
VLLM_VERSIONS = [
|
| 11 |
+
"v0.9.2", "v0.9.2rc2", "v0.9.2rc1", "v0.9.1", "v0.9.1rc2", "v0.9.1rc1",
|
| 12 |
+
"v0.9.0.1", "v0.9.0", "v0.8.5", "v0.8.5.post1", "v0.8.4", "v0.8.3",
|
| 13 |
+
"v0.8.3rc1", "v0.8.2", "v0.8.1", "v0.8.0", "v0.8.0rc2", "v0.8.0rc1",
|
| 14 |
+
"v0.7.3", "v0.7.2", "v0.7.1", "v0.6.6", "v0.6.6.post1", "v0.6.5",
|
| 15 |
+
"v0.6.4.post1", "v0.6.4", "v0.6.3.post1", "v0.6.2", "v0.6.1",
|
| 16 |
+
"v0.6.1.post2", "v0.6.1.post1", "v0.6.0"
|
| 17 |
+
]
|
| 18 |
+
|
| 19 |
+
# ๋ฆฌ๋๋ณด๋ ํ์ ์ปฌ๋ผ(ํญ์ ํฌํจ๋์ด์ผ ํจ)
|
| 20 |
+
LEADERBOARD_REQUIRED_COLUMNS = [
|
| 21 |
+
"Model Name", "Group", "Overall", "Model Type", "Output Form", "Rank"
|
| 22 |
+
]
|
| 23 |
+
|
| 24 |
+
# Model badge mappings (centralized for both UI and backend)
|
| 25 |
+
MODEL_TYPE_MAP = {
|
| 26 |
+
"deepseek_r1": "open",
|
| 27 |
+
"deepseek_r1_0528": "open",
|
| 28 |
+
"Qwen3-32B": "open",
|
| 29 |
+
"Gauss2.3-Think-250708": "closed"
|
| 30 |
+
}
|
| 31 |
+
OUTPUT_FORM_MAP = {
|
| 32 |
+
"deepseek_r1": "reasoning",
|
| 33 |
+
"deepseek_r1_0528": "normal",
|
| 34 |
+
"Qwen3-32B": "reasoning",
|
| 35 |
+
"Gauss2.3-Think-250708": "reasoning"
|
| 36 |
+
}
|
handlers.py
ADDED
|
@@ -0,0 +1,86 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
|
| 3 |
+
def search_leaderboard(query, df, sort_col=None, sort_asc=True):
|
| 4 |
+
if not query.strip():
|
| 5 |
+
filtered = df
|
| 6 |
+
else:
|
| 7 |
+
filtered = df[df.apply(lambda row: row.astype(str).str.contains(query, case=False).any(), axis=1)]
|
| 8 |
+
if sort_col and sort_col in filtered.columns:
|
| 9 |
+
filtered = filtered.sort_values(sort_col, ascending=sort_asc).reset_index(drop=True)
|
| 10 |
+
return filtered
|
| 11 |
+
|
| 12 |
+
def update_modelselector_group(groups, df):
|
| 13 |
+
"""
|
| 14 |
+
groups (gr.CheckboxGroup): List of currently selected models
|
| 15 |
+
df (DataFrame or gr.State): Current dataframe
|
| 16 |
+
"""
|
| 17 |
+
print("groups:", groups)
|
| 18 |
+
if not groups:
|
| 19 |
+
return None
|
| 20 |
+
|
| 21 |
+
filtered_df = df[df["Group"].isin(groups)]
|
| 22 |
+
models = filtered_df["Model Name"].unique().tolist()
|
| 23 |
+
|
| 24 |
+
return models
|
| 25 |
+
|
| 26 |
+
def update_columnselector_group(columns, groups, df):
|
| 27 |
+
print("column groups:", groups)
|
| 28 |
+
|
| 29 |
+
columns = [c for c in columns if c in df.columns[:3]]
|
| 30 |
+
|
| 31 |
+
columns.extend(df.columns[3:])
|
| 32 |
+
|
| 33 |
+
print(columns)
|
| 34 |
+
|
| 35 |
+
return columns
|
| 36 |
+
|
| 37 |
+
from constants import LEADERBOARD_REQUIRED_COLUMNS, MODEL_TYPE_MAP, OUTPUT_FORM_MAP
|
| 38 |
+
|
| 39 |
+
def update_leaderboard(models, columns, df, sort_col=None, sort_asc=True):
|
| 40 |
+
print("models:", models)
|
| 41 |
+
print("columns:", columns)
|
| 42 |
+
print("sort_col:", sort_col, "sort_asc:", sort_asc)
|
| 43 |
+
|
| 44 |
+
# ํ์ ์ปฌ๋ผ ํญ์ ํฌํจ
|
| 45 |
+
columns = list(dict.fromkeys(LEADERBOARD_REQUIRED_COLUMNS + list(columns)))
|
| 46 |
+
|
| 47 |
+
# ๋ฑ์ง/๋ญํฌ ๋ ๋๋ง์ ํ์ํ ์ปฌ๋ผ ํญ์ ํฌํจ
|
| 48 |
+
always_include = ["Model Name", "Model Type", "Output Form", "Rank"]
|
| 49 |
+
filtered_df = df[df["Model Name"].isin(models)].copy()
|
| 50 |
+
|
| 51 |
+
# Model Type, Output Form, Rank ์ปฌ๋ผ์ด ์์ผ๋ฉด ์์ฑ
|
| 52 |
+
if "Model Type" not in filtered_df.columns:
|
| 53 |
+
filtered_df["Model Type"] = filtered_df["Model Name"].map(MODEL_TYPE_MAP).fillna("open")
|
| 54 |
+
if "Output Form" not in filtered_df.columns:
|
| 55 |
+
filtered_df["Output Form"] = filtered_df["Model Name"].map(OUTPUT_FORM_MAP).fillna("normal")
|
| 56 |
+
if "Rank" not in filtered_df.columns:
|
| 57 |
+
# ์ ๋ ฌ ๊ธฐ์ค: sort_col์ด ์์ผ๋ฉด ํด๋น ์ปฌ๋ผ, ์์ผ๋ฉด Overall
|
| 58 |
+
rank_col = sort_col if sort_col and sort_col in filtered_df.columns else ("Overall" if "Overall" in filtered_df.columns else None)
|
| 59 |
+
if rank_col:
|
| 60 |
+
filtered_df = filtered_df.sort_values(rank_col, ascending=not sort_asc).reset_index(drop=True)
|
| 61 |
+
filtered_df["Rank"] = filtered_df.index + 1
|
| 62 |
+
else:
|
| 63 |
+
filtered_df["Rank"] = range(1, len(filtered_df) + 1)
|
| 64 |
+
|
| 65 |
+
# always_include ์ปฌ๋ผ์ ๋ฌด์กฐ๊ฑด ํฌํจ
|
| 66 |
+
filtered_columns = [c for c in df.columns if c in columns or c in always_include]
|
| 67 |
+
for col in always_include:
|
| 68 |
+
if col not in filtered_columns:
|
| 69 |
+
filtered_columns.append(col)
|
| 70 |
+
|
| 71 |
+
# ์ค๋ณต ์ ๊ฑฐ ๋ฐ ์์ ๋ณด์ฅ
|
| 72 |
+
filtered_columns = list(dict.fromkeys(filtered_columns))
|
| 73 |
+
filtered_df = filtered_df[filtered_columns]
|
| 74 |
+
|
| 75 |
+
for col in filtered_df.select_dtypes(include="number").columns:
|
| 76 |
+
filtered_df[col] = filtered_df[col].round(3)
|
| 77 |
+
|
| 78 |
+
if sort_col and sort_col in filtered_df.columns:
|
| 79 |
+
filtered_df = filtered_df.sort_values(sort_col, ascending=sort_asc).reset_index(drop=True)
|
| 80 |
+
# Rank ์ฌ๊ณ์ฐ
|
| 81 |
+
filtered_df["Rank"] = filtered_df.index + 1
|
| 82 |
+
|
| 83 |
+
return filtered_df
|
| 84 |
+
|
| 85 |
+
def get_models_by_group(df, groups):
|
| 86 |
+
return df[df["Group"].isin(groups)]["Model Name"].tolist()
|
src/about.py
CHANGED
|
@@ -21,13 +21,13 @@ NUM_FEWSHOT = 0 # Change with your few shot
|
|
| 21 |
|
| 22 |
|
| 23 |
# Your leaderboard name
|
| 24 |
-
TITLE = """<h1 align="center" id="space-title">๐ฅ
|
| 25 |
|
| 26 |
# What does your leaderboard evaluate?
|
| 27 |
INTRODUCTION_TEXT = """
|
| 28 |
Leaderboards for LLM evaluation.
|
| 29 |
|
| 30 |
-
|
| 31 |
"""
|
| 32 |
|
| 33 |
# Which evaluations are you running? how can people reproduce what you have?
|
|
@@ -87,7 +87,7 @@ EVALUATION_QUEUE_TEXT_OPTION2 = """
|
|
| 87 |
|
| 88 |
EVALUATION_QUEUE_TEXT_OPTION3 = """
|
| 89 |
# (Option 3) Pull Request
|
| 90 |
-
If Option 1 & 2 is unavailable, make [PR](https://huggingface.co/spaces/
|
| 91 |
|
| 92 |
```
|
| 93 |
### Open-weight models:
|
|
|
|
| 21 |
|
| 22 |
|
| 23 |
# Your leaderboard name
|
| 24 |
+
TITLE = """<h1 align="center" id="space-title">๐ฅ Test Space</h1>"""
|
| 25 |
|
| 26 |
# What does your leaderboard evaluate?
|
| 27 |
INTRODUCTION_TEXT = """
|
| 28 |
Leaderboards for LLM evaluation.
|
| 29 |
|
| 30 |
+
*TRUE(Trustworthy Real-world Usage Evaluation)Bench* is designed to evaluate LLMs for Productivity Assistants which stand for human's job productivity.
|
| 31 |
"""
|
| 32 |
|
| 33 |
# Which evaluations are you running? how can people reproduce what you have?
|
|
|
|
| 87 |
|
| 88 |
EVALUATION_QUEUE_TEXT_OPTION3 = """
|
| 89 |
# (Option 3) Pull Request
|
| 90 |
+
If Option 1 & 2 is unavailable, make [PR](https://huggingface.co/spaces/coms1580/test_space/discussions?new_pr=true) with [ADD_MODEL] prefix with contents as follows:
|
| 91 |
|
| 92 |
```
|
| 93 |
### Open-weight models:
|
src/config.py
CHANGED
|
@@ -2,34 +2,34 @@ ON_LOAD_COLUMNS_LANG = [
|
|
| 2 |
"Model Name",
|
| 3 |
"Group",
|
| 4 |
"Overall",
|
| 5 |
-
"
|
| 6 |
-
"
|
| 7 |
-
"
|
| 8 |
-
"
|
| 9 |
-
"
|
| 10 |
-
"
|
| 11 |
-
"
|
| 12 |
-
"
|
| 13 |
-
"
|
| 14 |
-
"
|
| 15 |
-
"
|
| 16 |
-
"
|
| 17 |
]
|
| 18 |
|
| 19 |
ON_LOAD_COLUMNS_CATEGORY = [
|
| 20 |
"Model Name",
|
| 21 |
"Group",
|
| 22 |
"Overall",
|
| 23 |
-
"
|
| 24 |
-
"
|
| 25 |
-
"
|
| 26 |
-
"
|
| 27 |
-
"
|
| 28 |
-
"
|
| 29 |
-
"
|
| 30 |
-
"
|
| 31 |
-
"
|
| 32 |
-
"
|
| 33 |
]
|
| 34 |
|
| 35 |
COLUMN_GROUP_LIST = [
|
|
|
|
| 2 |
"Model Name",
|
| 3 |
"Group",
|
| 4 |
"Overall",
|
| 5 |
+
"KO",
|
| 6 |
+
"EN",
|
| 7 |
+
"JA",
|
| 8 |
+
"ZH",
|
| 9 |
+
"PL",
|
| 10 |
+
"DE",
|
| 11 |
+
"PT",
|
| 12 |
+
"ES",
|
| 13 |
+
"FR",
|
| 14 |
+
"IT",
|
| 15 |
+
"RU",
|
| 16 |
+
"VI"
|
| 17 |
]
|
| 18 |
|
| 19 |
ON_LOAD_COLUMNS_CATEGORY = [
|
| 20 |
"Model Name",
|
| 21 |
"Group",
|
| 22 |
"Overall",
|
| 23 |
+
"Content Generation",
|
| 24 |
+
"Editing",
|
| 25 |
+
"Data Analysis",
|
| 26 |
+
"Reasoning",
|
| 27 |
+
"Hallucination",
|
| 28 |
+
"Safety",
|
| 29 |
+
"Repetition",
|
| 30 |
+
"Summarization",
|
| 31 |
+
"Translation",
|
| 32 |
+
"Multi-Turn"
|
| 33 |
]
|
| 34 |
|
| 35 |
COLUMN_GROUP_LIST = [
|
src/data/export_category_250618.csv
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
"Model Name" "Group" "Overall" "C1" "C2" "C3" "C4" "C5" "C6" "C7" "C8" "C9" "C10"
|
| 2 |
-
"M1" "G1" "0.00" "0.00" "0.00" "0.00" "0.00" "0.00" "0.00" "0.00" "0.00" "0.00" "0.00"
|
| 3 |
-
"M2" "G2" "0.00" "0.00" "0.00" "0.00" "0.00" "0.00" "0.00" "0.00" "0.00" "0.00" "0.00"
|
|
|
|
|
|
|
|
|
|
|
|
src/data/export_category_250709.csv
ADDED
|
Binary file (1.26 kB). View file
|
|
|
src/data/export_lang_250618.csv
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
"Model Name" "Group" "Overall" "L1" "L2" "L3" "L4" "L5" "L6" "L7" "L8" "L9" "L10" "L11" "L12"
|
| 2 |
-
"M1" "G1" "0.00" "0.00" "0.00" "0.00" "0.00" "0.00" "0.00" "0.00" "0.00" "0.00" "0.00" "0.00" "0.00"
|
| 3 |
-
"M2" "G2" "0.00" "0.00" "0.00" "0.00" "0.00" "0.00" "0.00" "0.00" "0.00" "0.00" "0.00" "0.00" "0.00"
|
|
|
|
|
|
|
|
|
|
|
|
src/data/export_lang_250709.csv
ADDED
|
Binary file (958 Bytes). View file
|
|
|
src/data_utils.py
CHANGED
|
@@ -3,12 +3,12 @@ from pathlib import Path
|
|
| 3 |
|
| 4 |
def get_dataframe_category():
|
| 5 |
abs_path = Path(__file__).parent
|
| 6 |
-
df = pd.read_csv(str(abs_path / "data/
|
| 7 |
df = df.sort_values("Overall", ascending=False)
|
| 8 |
return df
|
| 9 |
|
| 10 |
def get_dataframe_language():
|
| 11 |
abs_path = Path(__file__).parent
|
| 12 |
-
df = pd.read_csv(str(abs_path / "data/
|
| 13 |
df = df.sort_values("Overall", ascending=False)
|
| 14 |
return df
|
|
|
|
| 3 |
|
| 4 |
def get_dataframe_category():
|
| 5 |
abs_path = Path(__file__).parent
|
| 6 |
+
df = pd.read_csv(str(abs_path / "data/export_category_250709.csv"), encoding='utf-16', delimiter=" ")
|
| 7 |
df = df.sort_values("Overall", ascending=False)
|
| 8 |
return df
|
| 9 |
|
| 10 |
def get_dataframe_language():
|
| 11 |
abs_path = Path(__file__).parent
|
| 12 |
+
df = pd.read_csv(str(abs_path / "data/export_lang_250709.csv"), encoding='utf-16', delimiter=" ")
|
| 13 |
df = df.sort_values("Overall", ascending=False)
|
| 14 |
return df
|
src/display/css_html_js.py
CHANGED
|
@@ -1,5 +1,128 @@
|
|
| 1 |
custom_css = """
|
| 2 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
.markdown-text {
|
| 4 |
font-size: 16px !important;
|
| 5 |
}
|
|
@@ -22,7 +145,15 @@ custom_css = """
|
|
| 22 |
}
|
| 23 |
|
| 24 |
#leaderboard-table {
|
| 25 |
-
margin-top: 15px
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
}
|
| 27 |
|
| 28 |
#leaderboard-table-lite {
|
|
@@ -94,6 +225,53 @@ custom_css = """
|
|
| 94 |
#box-filter > .form{
|
| 95 |
border: 0
|
| 96 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 97 |
"""
|
| 98 |
|
| 99 |
get_window_url_params = """
|
|
@@ -103,3 +281,105 @@ get_window_url_params = """
|
|
| 103 |
return url_params;
|
| 104 |
}
|
| 105 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
custom_css = """
|
| 2 |
|
| 3 |
+
/* Sort arrow/button styles */
|
| 4 |
+
.sort-arrow, .sort-btn {
|
| 5 |
+
display: inline-flex;
|
| 6 |
+
align-items: center;
|
| 7 |
+
justify-content: center;
|
| 8 |
+
background: #23244a;
|
| 9 |
+
color: #ffd700 !important; /* ํญ์ ๋
ธ๋์ */
|
| 10 |
+
border: 1.5px solid #ffd700; /* ๊ธ์ ํ
๋๋ฆฌ */
|
| 11 |
+
border-radius: 6px;
|
| 12 |
+
font-size: 15px;
|
| 13 |
+
font-weight: 700;
|
| 14 |
+
margin-left: 6px;
|
| 15 |
+
margin-right: 2px;
|
| 16 |
+
padding: 2px 8px 2px 6px;
|
| 17 |
+
cursor: pointer;
|
| 18 |
+
transition: background 0.2s, color 0.2s, border 0.2s;
|
| 19 |
+
min-width: 28px;
|
| 20 |
+
min-height: 28px;
|
| 21 |
+
outline: none;
|
| 22 |
+
}
|
| 23 |
+
.sort-arrow.active, .sort-btn.active {
|
| 24 |
+
color: #ffd700 !important; /* ๊ธ์ */
|
| 25 |
+
border-color: #ffd700;
|
| 26 |
+
background: #1a237e;
|
| 27 |
+
}
|
| 28 |
+
.sort-arrow:hover, .sort-btn:hover {
|
| 29 |
+
background: #ffd700;
|
| 30 |
+
color: #23244a !important;
|
| 31 |
+
border-color: #ffd700;
|
| 32 |
+
}
|
| 33 |
+
.sort-arrow svg, .sort-btn svg {
|
| 34 |
+
margin-left: 2px;
|
| 35 |
+
margin-right: 0;
|
| 36 |
+
width: 1em;
|
| 37 |
+
height: 1em;
|
| 38 |
+
vertical-align: middle;
|
| 39 |
+
}
|
| 40 |
+
|
| 41 |
+
/* Enhanced leaderboard table styles */
|
| 42 |
+
.pretty-leaderboard-table {
|
| 43 |
+
width: 100%;
|
| 44 |
+
border-collapse: separate;
|
| 45 |
+
border-spacing: 0;
|
| 46 |
+
background: rgba(30, 34, 54, 0.98);
|
| 47 |
+
border-radius: 16px;
|
| 48 |
+
box-shadow: 0 4px 24px 0 rgba(16, 152, 247, 0.10), 0 1.5px 6px 0 rgba(227, 84, 84, 0.08);
|
| 49 |
+
overflow: hidden;
|
| 50 |
+
margin-bottom: 24px;
|
| 51 |
+
}
|
| 52 |
+
.pretty-leaderboard-table th, .pretty-leaderboard-table td {
|
| 53 |
+
padding: 12px 16px;
|
| 54 |
+
text-align: left;
|
| 55 |
+
border-bottom: 1px solid #23244a;
|
| 56 |
+
font-size: 15px;
|
| 57 |
+
}
|
| 58 |
+
.pretty-leaderboard-table th {
|
| 59 |
+
background: linear-gradient(90deg, #23244a 0%, #1a237e 100%);
|
| 60 |
+
color: #F5F6F7;
|
| 61 |
+
font-weight: 700;
|
| 62 |
+
letter-spacing: 0.5px;
|
| 63 |
+
border-bottom: 2px solid #1098F7;
|
| 64 |
+
}
|
| 65 |
+
.pretty-leaderboard-table tr:nth-child(even) {
|
| 66 |
+
background: rgba(245, 246, 247, 0.03);
|
| 67 |
+
}
|
| 68 |
+
.pretty-leaderboard-table tr:hover {
|
| 69 |
+
background: rgba(16, 152, 247, 0.08);
|
| 70 |
+
transition: background 0.2s;
|
| 71 |
+
}
|
| 72 |
+
.pretty-leaderboard-table td {
|
| 73 |
+
color: #F5F6F7;
|
| 74 |
+
vertical-align: middle;
|
| 75 |
+
}
|
| 76 |
+
.pretty-leaderboard-table tr:last-child td {
|
| 77 |
+
border-bottom: none;
|
| 78 |
+
}
|
| 79 |
+
.pretty-leaderboard-table th:first-child, .pretty-leaderboard-table td:first-child {
|
| 80 |
+
border-top-left-radius: 16px;
|
| 81 |
+
}
|
| 82 |
+
.pretty-leaderboard-table th:last-child, .pretty-leaderboard-table td:last-child {
|
| 83 |
+
border-top-right-radius: 16px;
|
| 84 |
+
}
|
| 85 |
+
|
| 86 |
+
/* Enhanced score bar styles */
|
| 87 |
+
.score-bar {
|
| 88 |
+
display: flex;
|
| 89 |
+
align-items: center;
|
| 90 |
+
gap: 12px;
|
| 91 |
+
width: 100%;
|
| 92 |
+
}
|
| 93 |
+
.score-bar-track {
|
| 94 |
+
flex-grow: 1;
|
| 95 |
+
height: 10px;
|
| 96 |
+
background: rgba(245, 246, 247, 0.12);
|
| 97 |
+
border-radius: 5px;
|
| 98 |
+
overflow: hidden;
|
| 99 |
+
max-width: 220px;
|
| 100 |
+
box-shadow: 0 1px 4px 0 rgba(16, 152, 247, 0.10);
|
| 101 |
+
}
|
| 102 |
+
.score-bar-fill {
|
| 103 |
+
height: 100%;
|
| 104 |
+
background: linear-gradient(90deg, #E35454, #1098F7);
|
| 105 |
+
border-radius: 5px;
|
| 106 |
+
transition: width 0.3s cubic-bezier(0.4,0,0.2,1);
|
| 107 |
+
}
|
| 108 |
+
.score-bar-value {
|
| 109 |
+
font-family: 'SF Mono', monospace;
|
| 110 |
+
font-weight: 600;
|
| 111 |
+
color: #F5F6F7;
|
| 112 |
+
min-width: 60px;
|
| 113 |
+
font-size: 14px;
|
| 114 |
+
}
|
| 115 |
+
|
| 116 |
+
body {
|
| 117 |
+
min-height: 100vh;
|
| 118 |
+
background: linear-gradient(135deg, #1a237e 0%, #311b92 100%);
|
| 119 |
+
background-image:
|
| 120 |
+
radial-gradient(rgba(255,255,255,0.12) 1.2px, transparent 1.2px),
|
| 121 |
+
radial-gradient(rgba(255,255,255,0.08) 1px, transparent 1px);
|
| 122 |
+
background-size: 40px 40px, 80px 80px;
|
| 123 |
+
background-position: 0 0, 20px 20px;
|
| 124 |
+
}
|
| 125 |
+
|
| 126 |
.markdown-text {
|
| 127 |
font-size: 16px !important;
|
| 128 |
}
|
|
|
|
| 145 |
}
|
| 146 |
|
| 147 |
#leaderboard-table {
|
| 148 |
+
margin-top: 15px;
|
| 149 |
+
/* Space-themed background */
|
| 150 |
+
background: linear-gradient(135deg, #1a237e 0%, #311b92 100%);
|
| 151 |
+
position: relative;
|
| 152 |
+
background-image:
|
| 153 |
+
radial-gradient(rgba(255,255,255,0.15) 1.2px, transparent 1.2px),
|
| 154 |
+
radial-gradient(rgba(255,255,255,0.10) 1px, transparent 1px);
|
| 155 |
+
background-size: 40px 40px, 80px 80px;
|
| 156 |
+
background-position: 0 0, 20px 20px;
|
| 157 |
}
|
| 158 |
|
| 159 |
#leaderboard-table-lite {
|
|
|
|
| 225 |
#box-filter > .form{
|
| 226 |
border: 0
|
| 227 |
}
|
| 228 |
+
|
| 229 |
+
/* Model type and output form badge styles */
|
| 230 |
+
.badge {
|
| 231 |
+
display: inline-block;
|
| 232 |
+
border-radius: 12px;
|
| 233 |
+
padding: 2px 10px;
|
| 234 |
+
font-size: 0.85em;
|
| 235 |
+
font-weight: 700;
|
| 236 |
+
margin-left: 6px;
|
| 237 |
+
box-shadow: 0 1px 4px rgba(0,0,0,0.10);
|
| 238 |
+
vertical-align: middle;
|
| 239 |
+
}
|
| 240 |
+
.badge-open {
|
| 241 |
+
background: linear-gradient(90deg, #2196f3, #21cbf3);
|
| 242 |
+
color: #fff;
|
| 243 |
+
}
|
| 244 |
+
.badge-closed {
|
| 245 |
+
background: linear-gradient(90deg, #757575, #bdbdbd);
|
| 246 |
+
color: #fff;
|
| 247 |
+
}
|
| 248 |
+
.badge-normal {
|
| 249 |
+
background: linear-gradient(90deg, #43a047, #66bb6a);
|
| 250 |
+
color: #fff;
|
| 251 |
+
}
|
| 252 |
+
.badge-reasoning {
|
| 253 |
+
background: linear-gradient(90deg, #8e24aa, #d500f9);
|
| 254 |
+
color: #fff;
|
| 255 |
+
}
|
| 256 |
+
|
| 257 |
+
/* Sort button styles */
|
| 258 |
+
.sort-btn {
|
| 259 |
+
background: #23244a;
|
| 260 |
+
color: #F5F6F7;
|
| 261 |
+
border: 1px solid #1098F7;
|
| 262 |
+
border-radius: 6px;
|
| 263 |
+
font-size: 13px;
|
| 264 |
+
font-weight: 700;
|
| 265 |
+
margin-left: 4px;
|
| 266 |
+
margin-right: 2px;
|
| 267 |
+
padding: 2px 7px;
|
| 268 |
+
cursor: pointer;
|
| 269 |
+
transition: background 0.2s, color 0.2s;
|
| 270 |
+
}
|
| 271 |
+
.sort-btn:hover {
|
| 272 |
+
background: #1098F7;
|
| 273 |
+
color: #fff;
|
| 274 |
+
}
|
| 275 |
"""
|
| 276 |
|
| 277 |
get_window_url_params = """
|
|
|
|
| 281 |
return url_params;
|
| 282 |
}
|
| 283 |
"""
|
| 284 |
+
|
| 285 |
+
def get_rank_badge(rank: int) -> str:
|
| 286 |
+
"""
|
| 287 |
+
Returns HTML for a rank badge (1st, 2nd, 3rd) with appropriate styling.
|
| 288 |
+
"""
|
| 289 |
+
badge_styles = {
|
| 290 |
+
1: ("1st", "linear-gradient(145deg, #ffd700, #ffc400)", "#000"),
|
| 291 |
+
2: ("2nd", "linear-gradient(145deg, #9ca3af, #787C7E)", "#fff"),
|
| 292 |
+
3: ("3rd", "linear-gradient(145deg, #CD7F32, #b36a1d)", "#fff"),
|
| 293 |
+
}
|
| 294 |
+
if rank in badge_styles:
|
| 295 |
+
label, gradient, text_color = badge_styles[rank]
|
| 296 |
+
return f'''
|
| 297 |
+
<div style="
|
| 298 |
+
display: inline-flex;
|
| 299 |
+
align-items: center;
|
| 300 |
+
justify-content: center;
|
| 301 |
+
min-width: 48px;
|
| 302 |
+
padding: 4px 12px;
|
| 303 |
+
background: {gradient};
|
| 304 |
+
color: {text_color};
|
| 305 |
+
border-radius: 6px;
|
| 306 |
+
font-weight: 700;
|
| 307 |
+
font-size: 1em;
|
| 308 |
+
box-shadow: 0 2px 4px rgba(0,0,0,0.18);
|
| 309 |
+
border: 1.5px solid #fff2;
|
| 310 |
+
">
|
| 311 |
+
{label}
|
| 312 |
+
</div>
|
| 313 |
+
'''
|
| 314 |
+
return f'''
|
| 315 |
+
<div style="
|
| 316 |
+
display: inline-flex;
|
| 317 |
+
align-items: center;
|
| 318 |
+
justify-content: center;
|
| 319 |
+
min-width: 28px;
|
| 320 |
+
color: #a1a1aa;
|
| 321 |
+
font-weight: 500;
|
| 322 |
+
">
|
| 323 |
+
{rank}
|
| 324 |
+
</div>
|
| 325 |
+
'''
|
| 326 |
+
|
| 327 |
+
def get_score_gauge(score: float, max_score: float = 1.0) -> str:
|
| 328 |
+
"""
|
| 329 |
+
Returns HTML for an overall score gauge (progress bar style).
|
| 330 |
+
"""
|
| 331 |
+
percent = min(max(score / max_score, 0), 1) * 100
|
| 332 |
+
return f'''
|
| 333 |
+
<div class="score-bar" style="margin: 0.5em 0;">
|
| 334 |
+
<div class="score-bar-track">
|
| 335 |
+
<div class="score-bar-fill" style="width: {percent}%;"></div>
|
| 336 |
+
</div>
|
| 337 |
+
<span class="score-bar-value">{score:.3f}</span>
|
| 338 |
+
</div>
|
| 339 |
+
'''
|
| 340 |
+
|
| 341 |
+
def get_leaderboard_table_html(df) -> str:
|
| 342 |
+
"""
|
| 343 |
+
Returns HTML for a pretty leaderboard table using badge and gauge.
|
| 344 |
+
Expects df to have columns: 'Model', 'Score', 'Model Type', 'Output Form'.
|
| 345 |
+
"""
|
| 346 |
+
def get_type_badge(model_type):
|
| 347 |
+
if model_type == "open":
|
| 348 |
+
return '<span class="badge badge-open">open</span>'
|
| 349 |
+
else:
|
| 350 |
+
return '<span class="badge badge-closed">closed</span>'
|
| 351 |
+
|
| 352 |
+
def get_output_badge(output_form):
|
| 353 |
+
if output_form == "reasoning":
|
| 354 |
+
return '<span class="badge badge-reasoning">reasoning</span>'
|
| 355 |
+
else:
|
| 356 |
+
return '<span class="badge badge-normal">normal</span>'
|
| 357 |
+
|
| 358 |
+
html = ['<table class="pretty-leaderboard-table">']
|
| 359 |
+
# Header
|
| 360 |
+
html.append(
|
| 361 |
+
"<thead><tr>"
|
| 362 |
+
"<th>Rank</th>"
|
| 363 |
+
"<th>Model</th>"
|
| 364 |
+
"<th>Overall Score</th>"
|
| 365 |
+
"</tr></thead>"
|
| 366 |
+
)
|
| 367 |
+
html.append("<tbody>")
|
| 368 |
+
for idx, row in enumerate(df.itertuples(index=False), 1):
|
| 369 |
+
model = getattr(row, "Model", "")
|
| 370 |
+
score = getattr(row, "Score", 0.0)
|
| 371 |
+
model_type = getattr(row, "Model_Type", getattr(row, "Model Type", "open"))
|
| 372 |
+
output_form = getattr(row, "Output_Form", getattr(row, "Output Form", "normal"))
|
| 373 |
+
badge = get_rank_badge(idx)
|
| 374 |
+
gauge = get_score_gauge(score)
|
| 375 |
+
type_badge = get_type_badge(model_type)
|
| 376 |
+
output_badge = get_output_badge(output_form)
|
| 377 |
+
html.append(
|
| 378 |
+
f"<tr>"
|
| 379 |
+
f"<td>{badge}</td>"
|
| 380 |
+
f"<td>{model} {type_badge} {output_badge}</td>"
|
| 381 |
+
f"<td>{gauge}</td>"
|
| 382 |
+
f"</tr>"
|
| 383 |
+
)
|
| 384 |
+
html.append("</tbody></table>")
|
| 385 |
+
return "\n".join(html)
|
src/display/formatting.py
CHANGED
|
@@ -25,3 +25,128 @@ def has_no_nan_values(df, columns):
|
|
| 25 |
|
| 26 |
def has_nan_values(df, columns):
|
| 27 |
return df[columns].isna().any(axis=1)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
|
| 26 |
def has_nan_values(df, columns):
|
| 27 |
return df[columns].isna().any(axis=1)
|
| 28 |
+
|
| 29 |
+
def get_score_bar(score):
|
| 30 |
+
"""
|
| 31 |
+
Generate HTML for a score bar with gradient styling.
|
| 32 |
+
Expects score in the range 0-100.
|
| 33 |
+
"""
|
| 34 |
+
width = max(0, min(score, 100)) # Clamp to [0, 100]
|
| 35 |
+
return f"""
|
| 36 |
+
<div class="score-bar">
|
| 37 |
+
<div class="score-bar-track">
|
| 38 |
+
<div class="score-bar-fill" style="width: {width}%;"></div>
|
| 39 |
+
</div>
|
| 40 |
+
<span class="score-bar-value">{score:.3f}</span>
|
| 41 |
+
</div>
|
| 42 |
+
"""
|
| 43 |
+
|
| 44 |
+
def render_leaderboard_html(df, overall_col="average"):
|
| 45 |
+
"""
|
| 46 |
+
Render a DataFrame as an HTML table, replacing the overall_col with a gauge bar.
|
| 47 |
+
"""
|
| 48 |
+
from .formatting import get_score_bar
|
| 49 |
+
from src.display.css_html_js import get_rank_badge
|
| 50 |
+
|
| 51 |
+
def get_type_badge(model_type):
|
| 52 |
+
if model_type == "open":
|
| 53 |
+
return '<span class="badge badge-open">open</span>'
|
| 54 |
+
else:
|
| 55 |
+
return '<span class="badge badge-closed">closed</span>'
|
| 56 |
+
|
| 57 |
+
def get_output_badge(output_form):
|
| 58 |
+
if output_form == "reasoning":
|
| 59 |
+
return '<span class="badge badge-reasoning">reasoning</span>'
|
| 60 |
+
else:
|
| 61 |
+
return '<span class="badge badge-normal">normal</span>'
|
| 62 |
+
|
| 63 |
+
# ์จ๊ธธ ์ปฌ๋ผ
|
| 64 |
+
hidden_cols = ["Model", "Model Type", "Output Form", "Rank"]
|
| 65 |
+
|
| 66 |
+
# Build table header
|
| 67 |
+
def get_sort_arrow(col, sort_col, sort_asc):
|
| 68 |
+
# "Model Name", "Group" ์ปฌ๋ผ์ ์ ์ธํ ๋ชจ๋ ์ปฌ๋ผ์ ์ ๋ ฌ ๋ฒํผ ๋
ธ์ถ
|
| 69 |
+
if col in {"Model Name", "Group"}:
|
| 70 |
+
return ""
|
| 71 |
+
# ํ๋์ ๋ฒํผ(โฒ ๋๋ โผ)๋ง ๋
ธ์ถ, ํด๋ฆญ ์ asc๊ฐ ๋ฐ์ ๋จ
|
| 72 |
+
if col == sort_col:
|
| 73 |
+
# ํ์ฌ ์ ๋ ฌ ์ํ์ ๋ฐ๋ผ ์์ด์ฝ๊ณผ data-asc๋ฅผ ๋ฐ์
|
| 74 |
+
if sort_asc:
|
| 75 |
+
# ์ค๋ฆ์ฐจ์ ์ํ: โผ ์์ด์ฝ, ํด๋ฆญ ์ ๋ด๋ฆผ์ฐจ์
|
| 76 |
+
svg = (
|
| 77 |
+
'<svg width="14" height="14" viewBox="0 0 14 14" style="vertical-align:middle">'
|
| 78 |
+
'<polygon points="3,5 11,5 7,11" fill="currentColor"/></svg>'
|
| 79 |
+
)
|
| 80 |
+
return (
|
| 81 |
+
f'<span class="sort-arrow active" data-col="{col}" data-asc="false" aria-label="๋ด๋ฆผ์ฐจ์ ์ ๋ ฌ">{svg}</span>'
|
| 82 |
+
)
|
| 83 |
+
else:
|
| 84 |
+
# ๋ด๋ฆผ์ฐจ์ ์ํ: โฒ ์์ด์ฝ, ํด๋ฆญ ์ ์ค๋ฆ์ฐจ์
|
| 85 |
+
svg = (
|
| 86 |
+
'<svg width="14" height="14" viewBox="0 0 14 14" style="vertical-align:middle">'
|
| 87 |
+
'<polygon points="7,3 11,9 3,9" fill="currentColor"/></svg>'
|
| 88 |
+
)
|
| 89 |
+
return (
|
| 90 |
+
f'<span class="sort-arrow active" data-col="{col}" data-asc="true" aria-label="์ค๋ฆ์ฐจ์ ์ ๋ ฌ">{svg}</span>'
|
| 91 |
+
)
|
| 92 |
+
else:
|
| 93 |
+
# ์ ๋ ฌ ์ค์ด ์๋ ์ปฌ๋ผ: โฒ(์ค๋ฆ์ฐจ์) ์์ด์ฝ, ํด๋ฆญ ์ ์ค๋ฆ์ฐจ์
|
| 94 |
+
svg = (
|
| 95 |
+
'<svg width="14" height="14" viewBox="0 0 14 14" style="vertical-align:middle">'
|
| 96 |
+
'<polygon points="7,3 11,9 3,9" fill="currentColor"/></svg>'
|
| 97 |
+
)
|
| 98 |
+
return (
|
| 99 |
+
f'<span class="sort-arrow" data-col="{col}" data-asc="true" aria-label="์ค๋ฆ์ฐจ์ ์ ๋ ฌ">{svg}</span>'
|
| 100 |
+
)
|
| 101 |
+
|
| 102 |
+
# ์ ๋ ฌ ์ํ ์ถ์ถ (State์์ ์ ๋ฌ๋ฐ๊ฑฐ๋ ๊ธฐ๋ณธ๊ฐ)
|
| 103 |
+
sort_col = getattr(df, "_sort_col", None) or (df.columns[0] if len(df.columns) > 0 else None)
|
| 104 |
+
sort_asc = getattr(df, "_sort_asc", None)
|
| 105 |
+
if sort_asc is None:
|
| 106 |
+
sort_asc = True
|
| 107 |
+
|
| 108 |
+
html = '<table class="pretty-leaderboard-table">\n<thead><tr>'
|
| 109 |
+
for col in df.columns:
|
| 110 |
+
if col in hidden_cols:
|
| 111 |
+
continue
|
| 112 |
+
html += f'<th>{col}{get_sort_arrow(col, sort_col, sort_asc)}</th>'
|
| 113 |
+
html += '</tr></thead>\n<tbody>\n'
|
| 114 |
+
|
| 115 |
+
# Build table rows
|
| 116 |
+
for idx, row in df.iterrows():
|
| 117 |
+
html += '<tr>'
|
| 118 |
+
for col in df.columns:
|
| 119 |
+
if col in hidden_cols:
|
| 120 |
+
continue
|
| 121 |
+
cell = row[col]
|
| 122 |
+
if col == overall_col:
|
| 123 |
+
try:
|
| 124 |
+
cell_html = get_score_bar(float(cell))
|
| 125 |
+
except Exception:
|
| 126 |
+
cell_html = str(cell)
|
| 127 |
+
html += f'<td>{cell_html}</td>'
|
| 128 |
+
elif col in ["Model Name"]:
|
| 129 |
+
# 1~3์ ํ์ด๋ผ์ดํธ + 4๋ฑ ์ดํ ํฐ์ + ๋ฑ์ง ํญ์ ํ์
|
| 130 |
+
rank = row.get("Rank", None)
|
| 131 |
+
model_type = row.get("Model Type", None) or row.get("Model_Type", None)
|
| 132 |
+
output_form = row.get("Output Form", None) or row.get("Output_Form", None)
|
| 133 |
+
highlight_style = ""
|
| 134 |
+
if rank == 1 or rank == "1":
|
| 135 |
+
highlight_style = "color: #ffd700; font-weight: bold; text-shadow: 0 0 4px #fff2;"
|
| 136 |
+
elif rank == 2 or rank == "2":
|
| 137 |
+
highlight_style = "color: #b0b0b0; font-weight: bold;"
|
| 138 |
+
elif rank == 3 or rank == "3":
|
| 139 |
+
highlight_style = "color: #cd7f32; font-weight: bold;"
|
| 140 |
+
else:
|
| 141 |
+
highlight_style = "color: #fff; font-weight: 600;"
|
| 142 |
+
badge_html = ""
|
| 143 |
+
if model_type:
|
| 144 |
+
badge_html += " " + get_type_badge(model_type)
|
| 145 |
+
if output_form:
|
| 146 |
+
badge_html += " " + get_output_badge(output_form)
|
| 147 |
+
html += f'<td><span style="{highlight_style}">{cell}</span>{badge_html}</td>'
|
| 148 |
+
else:
|
| 149 |
+
html += f'<td>{cell}</td>'
|
| 150 |
+
html += '</tr>\n'
|
| 151 |
+
html += '</tbody></table>'
|
| 152 |
+
return html
|
src/display/utils.py
CHANGED
|
@@ -21,24 +21,26 @@ class ColumnContent:
|
|
| 21 |
never_hidden: bool = False
|
| 22 |
|
| 23 |
## Leaderboard columns
|
|
|
|
|
|
|
| 24 |
auto_eval_column_dict = []
|
| 25 |
# Init
|
| 26 |
-
auto_eval_column_dict.append(["model_type_symbol", ColumnContent, ColumnContent("T", "str", True, never_hidden=True)])
|
| 27 |
-
auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)])
|
| 28 |
-
#Scores
|
| 29 |
-
auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Average โฌ๏ธ", "number", True)])
|
| 30 |
for task in Tasks:
|
| 31 |
-
auto_eval_column_dict.append([task.name, ColumnContent,
|
| 32 |
# Model information
|
| 33 |
-
auto_eval_column_dict.append(["model_type", ColumnContent, ColumnContent("Type", "str", False)])
|
| 34 |
-
auto_eval_column_dict.append(["architecture", ColumnContent, ColumnContent("Architecture", "str", False)])
|
| 35 |
-
auto_eval_column_dict.append(["weight_type", ColumnContent, ColumnContent("Weight type", "str", False, True)])
|
| 36 |
-
auto_eval_column_dict.append(["precision", ColumnContent, ColumnContent("Precision", "str", False)])
|
| 37 |
-
auto_eval_column_dict.append(["license", ColumnContent, ColumnContent("Hub License", "str", False)])
|
| 38 |
-
auto_eval_column_dict.append(["params", ColumnContent, ColumnContent("#Params (B)", "number", False)])
|
| 39 |
-
auto_eval_column_dict.append(["likes", ColumnContent, ColumnContent("Hub โค๏ธ", "number", False)])
|
| 40 |
-
auto_eval_column_dict.append(["still_on_hub", ColumnContent, ColumnContent("Available on the hub", "bool", False)])
|
| 41 |
-
auto_eval_column_dict.append(["revision", ColumnContent, ColumnContent("Model sha", "str", False, False)])
|
| 42 |
|
| 43 |
# We use make dataclass to dynamically fill the scores from Tasks
|
| 44 |
AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)
|
|
@@ -113,4 +115,3 @@ EVAL_COLS = [c.name for c in fields(EvalQueueColumn)]
|
|
| 113 |
EVAL_TYPES = [c.type for c in fields(EvalQueueColumn)]
|
| 114 |
|
| 115 |
BENCHMARK_COLS = [t.value.col_name for t in Tasks]
|
| 116 |
-
|
|
|
|
| 21 |
never_hidden: bool = False
|
| 22 |
|
| 23 |
## Leaderboard columns
|
| 24 |
+
from dataclasses import field
|
| 25 |
+
|
| 26 |
auto_eval_column_dict = []
|
| 27 |
# Init
|
| 28 |
+
auto_eval_column_dict.append(["model_type_symbol", ColumnContent, field(default_factory=lambda: ColumnContent("T", "str", True, never_hidden=True))])
|
| 29 |
+
auto_eval_column_dict.append(["model", ColumnContent, field(default_factory=lambda: ColumnContent("Model", "markdown", True, never_hidden=True))])
|
| 30 |
+
# Scores
|
| 31 |
+
auto_eval_column_dict.append(["average", ColumnContent, field(default_factory=lambda: ColumnContent("Average โฌ๏ธ", "number", True))])
|
| 32 |
for task in Tasks:
|
| 33 |
+
auto_eval_column_dict.append([task.name, ColumnContent, field(default_factory=lambda t=task: ColumnContent(t.value.col_name, "number", True))])
|
| 34 |
# Model information
|
| 35 |
+
auto_eval_column_dict.append(["model_type", ColumnContent, field(default_factory=lambda: ColumnContent("Type", "str", False))])
|
| 36 |
+
auto_eval_column_dict.append(["architecture", ColumnContent, field(default_factory=lambda: ColumnContent("Architecture", "str", False))])
|
| 37 |
+
auto_eval_column_dict.append(["weight_type", ColumnContent, field(default_factory=lambda: ColumnContent("Weight type", "str", False, True))])
|
| 38 |
+
auto_eval_column_dict.append(["precision", ColumnContent, field(default_factory=lambda: ColumnContent("Precision", "str", False))])
|
| 39 |
+
auto_eval_column_dict.append(["license", ColumnContent, field(default_factory=lambda: ColumnContent("Hub License", "str", False))])
|
| 40 |
+
auto_eval_column_dict.append(["params", ColumnContent, field(default_factory=lambda: ColumnContent("#Params (B)", "number", False))])
|
| 41 |
+
auto_eval_column_dict.append(["likes", ColumnContent, field(default_factory=lambda: ColumnContent("Hub โค๏ธ", "number", False))])
|
| 42 |
+
auto_eval_column_dict.append(["still_on_hub", ColumnContent, field(default_factory=lambda: ColumnContent("Available on the hub", "bool", False))])
|
| 43 |
+
auto_eval_column_dict.append(["revision", ColumnContent, field(default_factory=lambda: ColumnContent("Model sha", "str", False, False))])
|
| 44 |
|
| 45 |
# We use make dataclass to dynamically fill the scores from Tasks
|
| 46 |
AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)
|
|
|
|
| 115 |
EVAL_TYPES = [c.type for c in fields(EvalQueueColumn)]
|
| 116 |
|
| 117 |
BENCHMARK_COLS = [t.value.col_name for t in Tasks]
|
|
|
src/envs.py
CHANGED
|
@@ -6,7 +6,7 @@ from huggingface_hub import HfApi
|
|
| 6 |
# ----------------------------------
|
| 7 |
TOKEN = os.environ.get("HF_TOKEN") # A read/write token for your org
|
| 8 |
|
| 9 |
-
OWNER = "
|
| 10 |
# ----------------------------------
|
| 11 |
|
| 12 |
REPO_ID = f"{OWNER}/test_space"
|
|
|
|
| 6 |
# ----------------------------------
|
| 7 |
TOKEN = os.environ.get("HF_TOKEN") # A read/write token for your org
|
| 8 |
|
| 9 |
+
OWNER = "coms1580" # Change to your org - don't forget to create a results and request dataset, with the correct format!
|
| 10 |
# ----------------------------------
|
| 11 |
|
| 12 |
REPO_ID = f"{OWNER}/test_space"
|
src/submission/check_validity.py
CHANGED
|
@@ -95,6 +95,6 @@ def already_submitted_models(requested_models_dir: str) -> set[str]:
|
|
| 95 |
if info["model"].count("/") == 0 or "submitted_time" not in info:
|
| 96 |
continue
|
| 97 |
organisation, _ = info["model"].split("/")
|
| 98 |
-
users_to_submission_dates[organisation].extend([{"benchmark": info['benchmark'], "submitted_time": info["submitted_time"]}])
|
| 99 |
|
| 100 |
return set(file_names), users_to_submission_dates
|
|
|
|
| 95 |
if info["model"].count("/") == 0 or "submitted_time" not in info:
|
| 96 |
continue
|
| 97 |
organisation, _ = info["model"].split("/")
|
| 98 |
+
users_to_submission_dates[organisation].extend([{"benchmark": info['benchmark'], "model": info["model"], "submitted_time": info["submitted_time"]}])
|
| 99 |
|
| 100 |
return set(file_names), users_to_submission_dates
|
src/submission/submit.py
CHANGED
|
@@ -56,7 +56,7 @@ def add_new_eval_option1(
|
|
| 56 |
hours_diff = (datetime.strptime(current_time, "%Y-%m-%dT%H:%M:%S %z") - datetime.strptime(submission_times[i], "%Y-%m-%dT%H:%M:%S %z")).total_seconds() / 3600
|
| 57 |
if hours_diff <= 24:
|
| 58 |
submission_cnt += 1
|
| 59 |
-
if submission_cnt >
|
| 60 |
return styled_error("The organization already submitted three times for this benchmark today.")
|
| 61 |
|
| 62 |
# Does the model actually exist?
|
|
@@ -127,8 +127,14 @@ def add_new_eval_option1(
|
|
| 127 |
}
|
| 128 |
|
| 129 |
# Check for duplicate submission
|
| 130 |
-
if
|
| 131 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 132 |
|
| 133 |
print("Creating eval file")
|
| 134 |
OUT_DIR = f"{EVAL_REQUESTS_PATH}/{user_name}"
|
|
@@ -193,14 +199,13 @@ def add_new_eval_option2(
|
|
| 193 |
return styled_error("The submitter does not have submission rights for this model.")
|
| 194 |
|
| 195 |
# Does the organization submit more than three times in a day?
|
| 196 |
-
print(USERS_TO_SUBMISSION_DATES)
|
| 197 |
submission_times = [item['submitted_time'] for item in USERS_TO_SUBMISSION_DATES[user_name] if item['benchmark'] == benchmark]
|
| 198 |
submission_cnt = 0
|
| 199 |
for i in range(len(submission_times)):
|
| 200 |
hours_diff = (datetime.strptime(current_time, "%Y-%m-%dT%H:%M:%S %z") - datetime.strptime(submission_times[i], "%Y-%m-%dT%H:%M:%S %z")).total_seconds() / 3600
|
| 201 |
if hours_diff <= 24:
|
| 202 |
submission_cnt += 1
|
| 203 |
-
if submission_cnt >
|
| 204 |
return styled_error("The organization already submitted three times for this benchmark today.")
|
| 205 |
|
| 206 |
# Does the model actually exist?
|
|
@@ -271,8 +276,14 @@ def add_new_eval_option2(
|
|
| 271 |
}
|
| 272 |
|
| 273 |
# Check for duplicate submission
|
| 274 |
-
if
|
| 275 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 276 |
|
| 277 |
print("Creating eval file")
|
| 278 |
OUT_DIR = f"{EVAL_REQUESTS_PATH}/{user_name}"
|
|
|
|
| 56 |
hours_diff = (datetime.strptime(current_time, "%Y-%m-%dT%H:%M:%S %z") - datetime.strptime(submission_times[i], "%Y-%m-%dT%H:%M:%S %z")).total_seconds() / 3600
|
| 57 |
if hours_diff <= 24:
|
| 58 |
submission_cnt += 1
|
| 59 |
+
if submission_cnt > 3:
|
| 60 |
return styled_error("The organization already submitted three times for this benchmark today.")
|
| 61 |
|
| 62 |
# Does the model actually exist?
|
|
|
|
| 127 |
}
|
| 128 |
|
| 129 |
# Check for duplicate submission
|
| 130 |
+
submission_times = [item['submitted_time'] for item in USERS_TO_SUBMISSION_DATES[user_name] if item['benchmark'] == benchmark and item['model'] == model]
|
| 131 |
+
submission_cnt = 0
|
| 132 |
+
for i in range(len(submission_times)):
|
| 133 |
+
hours_diff = (datetime.strptime(current_time, "%Y-%m-%dT%H:%M:%S %z") - datetime.strptime(submission_times[i], "%Y-%m-%dT%H:%M:%S %z")).total_seconds() / 3600
|
| 134 |
+
if hours_diff <= 24:
|
| 135 |
+
submission_cnt += 1
|
| 136 |
+
if submission_cnt > 1:
|
| 137 |
+
return styled_warning("This model has been already submitted within 24 hours.")
|
| 138 |
|
| 139 |
print("Creating eval file")
|
| 140 |
OUT_DIR = f"{EVAL_REQUESTS_PATH}/{user_name}"
|
|
|
|
| 199 |
return styled_error("The submitter does not have submission rights for this model.")
|
| 200 |
|
| 201 |
# Does the organization submit more than three times in a day?
|
|
|
|
| 202 |
submission_times = [item['submitted_time'] for item in USERS_TO_SUBMISSION_DATES[user_name] if item['benchmark'] == benchmark]
|
| 203 |
submission_cnt = 0
|
| 204 |
for i in range(len(submission_times)):
|
| 205 |
hours_diff = (datetime.strptime(current_time, "%Y-%m-%dT%H:%M:%S %z") - datetime.strptime(submission_times[i], "%Y-%m-%dT%H:%M:%S %z")).total_seconds() / 3600
|
| 206 |
if hours_diff <= 24:
|
| 207 |
submission_cnt += 1
|
| 208 |
+
if submission_cnt > 3:
|
| 209 |
return styled_error("The organization already submitted three times for this benchmark today.")
|
| 210 |
|
| 211 |
# Does the model actually exist?
|
|
|
|
| 276 |
}
|
| 277 |
|
| 278 |
# Check for duplicate submission
|
| 279 |
+
submission_times = [item['submitted_time'] for item in USERS_TO_SUBMISSION_DATES[user_name] if item['benchmark'] == benchmark and item['model'] == model]
|
| 280 |
+
submission_cnt = 0
|
| 281 |
+
for i in range(len(submission_times)):
|
| 282 |
+
hours_diff = (datetime.strptime(current_time, "%Y-%m-%dT%H:%M:%S %z") - datetime.strptime(submission_times[i], "%Y-%m-%dT%H:%M:%S %z")).total_seconds() / 3600
|
| 283 |
+
if hours_diff <= 24:
|
| 284 |
+
submission_cnt += 1
|
| 285 |
+
if submission_cnt > 1:
|
| 286 |
+
return styled_warning("This model has been already submitted within 24 hours.")
|
| 287 |
|
| 288 |
print("Creating eval file")
|
| 289 |
OUT_DIR = f"{EVAL_REQUESTS_PATH}/{user_name}"
|
ui.py
ADDED
|
@@ -0,0 +1,228 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import src.config as configs
|
| 3 |
+
from constants import TAB_NAMES, MODEL_TYPE_MAP, OUTPUT_FORM_MAP
|
| 4 |
+
from src.display.formatting import render_leaderboard_html
|
| 5 |
+
from src.display.css_html_js import get_leaderboard_table_html, custom_css
|
| 6 |
+
import pandas as pd
|
| 7 |
+
from constants import LEADERBOARD_REQUIRED_COLUMNS
|
| 8 |
+
|
| 9 |
+
def render_pretty_leaderboard_html(df):
|
| 10 |
+
"""
|
| 11 |
+
Renders a pretty leaderboard table using badge and gauge.
|
| 12 |
+
Supports both ['Model', 'Score'] and ['Model Name', 'Overall'] columns.
|
| 13 |
+
Sorts by score descending and rounds for display.
|
| 14 |
+
"""
|
| 15 |
+
# Flexible column mapping
|
| 16 |
+
col_map = {}
|
| 17 |
+
if "Model" in df.columns:
|
| 18 |
+
col_map["Model"] = "Model"
|
| 19 |
+
elif "Model Name" in df.columns:
|
| 20 |
+
col_map["Model"] = "Model Name"
|
| 21 |
+
else:
|
| 22 |
+
return "<div style='color:red'>DataFrame must have a 'Model' or 'Model Name' column.</div>"
|
| 23 |
+
if "Score" in df.columns:
|
| 24 |
+
col_map["Score"] = "Score"
|
| 25 |
+
elif "Overall" in df.columns:
|
| 26 |
+
col_map["Score"] = "Overall"
|
| 27 |
+
else:
|
| 28 |
+
return "<div style='color:red'>DataFrame must have a 'Score' or 'Overall' column.</div>"
|
| 29 |
+
|
| 30 |
+
# Example mappings for demonstration (expand as needed)
|
| 31 |
+
model_type_map = MODEL_TYPE_MAP
|
| 32 |
+
output_form_map = OUTPUT_FORM_MAP
|
| 33 |
+
|
| 34 |
+
# Copy and rename for uniformity
|
| 35 |
+
df2 = df.copy()
|
| 36 |
+
df2 = df2.rename(columns={col_map["Model"]: "Model", col_map["Score"]: "Score"})
|
| 37 |
+
|
| 38 |
+
# ๋งคํ ์ ํ๋ก ๋๋ฝ๋ ๋ชจ๋ธ๋ช
์ ์ถ๋ ฅ (๋๋ฒ๊น
์ฉ)
|
| 39 |
+
missing_type = set(df2["Model"]) - set(model_type_map.keys())
|
| 40 |
+
missing_output = set(df2["Model"]) - set(output_form_map.keys())
|
| 41 |
+
if missing_type:
|
| 42 |
+
print("Model Type ๋งคํ ๋๋ฝ:", missing_type)
|
| 43 |
+
if missing_output:
|
| 44 |
+
print("Output Form ๋งคํ ๋๋ฝ:", missing_output)
|
| 45 |
+
|
| 46 |
+
# Add badge columns
|
| 47 |
+
df2["Model Type"] = df2["Model"].map(model_type_map).fillna("open")
|
| 48 |
+
df2["Output Form"] = df2["Model"].map(output_form_map).fillna("normal")
|
| 49 |
+
# Drop NA, sort, round
|
| 50 |
+
df2 = df2[["Model", "Score", "Model Type", "Output Form"]].dropna()
|
| 51 |
+
df2["Score"] = pd.to_numeric(df2["Score"], errors="coerce").round(2)
|
| 52 |
+
df2 = df2.sort_values("Score", ascending=False).reset_index(drop=True)
|
| 53 |
+
|
| 54 |
+
return get_leaderboard_table_html(df2)
|
| 55 |
+
|
| 56 |
+
def create_leaderboard_tab(df, key, search_leaderboard, update_modelselector_group, update_leaderboard, column_selector_value):
|
| 57 |
+
"""
|
| 58 |
+
df: DataFrame to display
|
| 59 |
+
key: "Category" or "Language"
|
| 60 |
+
search_leaderboard, update_modelselector_group, update_leaderboard: handler functions
|
| 61 |
+
column_selector_value: default columns to select
|
| 62 |
+
"""
|
| 63 |
+
with gr.TabItem(
|
| 64 |
+
TAB_NAMES[key],
|
| 65 |
+
visible=True
|
| 66 |
+
):
|
| 67 |
+
df_state = gr.State(df)
|
| 68 |
+
|
| 69 |
+
with gr.Row():
|
| 70 |
+
with gr.Column():
|
| 71 |
+
search_box = gr.Textbox(label="Search Model by Name")
|
| 72 |
+
group_list = df["Group"].unique().tolist()
|
| 73 |
+
group_selector = gr.CheckboxGroup(
|
| 74 |
+
choices=df["Group"].unique().tolist(),
|
| 75 |
+
value=group_list,
|
| 76 |
+
label="Select Model Group"
|
| 77 |
+
)
|
| 78 |
+
# ํ์ ์ปฌ๋ผ ํญ์ ํฌํจ, ์ฒดํฌ ํด์ ๋ถ๊ฐ(disabled)
|
| 79 |
+
# ์ ํ์ง์์ "Model Name", "Group", "Overall" ์ ์ธ
|
| 80 |
+
exclude_cols = {"Model Name", "Group", "Overall"}
|
| 81 |
+
selectable_columns = [col for col in df.columns.tolist()[3:] if col not in exclude_cols]
|
| 82 |
+
all_columns = list(dict.fromkeys(LEADERBOARD_REQUIRED_COLUMNS + selectable_columns))
|
| 83 |
+
column_selector = gr.CheckboxGroup(
|
| 84 |
+
choices=selectable_columns,
|
| 85 |
+
value=[col for col in column_selector_value if col in selectable_columns],
|
| 86 |
+
label="Select Columns"
|
| 87 |
+
)
|
| 88 |
+
|
| 89 |
+
with gr.Column():
|
| 90 |
+
with gr.Accordion("Model List", open=False):
|
| 91 |
+
model_group = df["Model Name"].tolist()
|
| 92 |
+
model_selector = gr.CheckboxGroup(
|
| 93 |
+
choices=df["Model Name"].tolist(),
|
| 94 |
+
value=model_group,
|
| 95 |
+
label="Select Models"
|
| 96 |
+
)
|
| 97 |
+
|
| 98 |
+
# badge ์ ๋ณด ํฌํจ DataFrame ์์ฑ (์์ชฝ ํ
์ด๋ธ์ฉ)
|
| 99 |
+
df_badge = df.copy()
|
| 100 |
+
# Model ์ปฌ๋ผ๋ช
ํต์ผ
|
| 101 |
+
if "Model Name" in df_badge.columns:
|
| 102 |
+
df_badge["Model"] = df_badge["Model Name"]
|
| 103 |
+
# ์์ ๋งคํ (์๋์ชฝ๊ณผ ๋์ผํ๊ฒ ํ์ฅ)
|
| 104 |
+
model_type_map = MODEL_TYPE_MAP
|
| 105 |
+
output_form_map = OUTPUT_FORM_MAP
|
| 106 |
+
df_badge["Model Type"] = df_badge["Model"].map(model_type_map).fillna("open")
|
| 107 |
+
df_badge["Output Form"] = df_badge["Model"].map(output_form_map).fillna("normal")
|
| 108 |
+
df_badge = df_badge.sort_values("Overall" if "Overall" in df_badge.columns else "Score", ascending=False).reset_index(drop=True)
|
| 109 |
+
df_badge["Rank"] = df_badge.index + 1
|
| 110 |
+
|
| 111 |
+
# ์ ๋ ฌ ์ํ ๊ด๋ฆฌ์ฉ State (ํ ๋ฒ๋ง ์์ฑ, ์ดํ ์ฌ์ฌ์ฉ)
|
| 112 |
+
default_sort_col = "Overall" if "Overall" in df_badge.columns else "Score"
|
| 113 |
+
sort_col_state = gr.State(default_sort_col)
|
| 114 |
+
sort_asc_state = gr.State(False) # ๋ด๋ฆผ์ฐจ์์ด ๊ธฐ๋ณธ๊ฐ
|
| 115 |
+
|
| 116 |
+
# ์ ๋ ฌ ํจ์ (JS์์ ๋๊ธด asc ๊ฐ์ ๊ทธ๋๋ก ์ฌ์ฉ)
|
| 117 |
+
def sort_and_render(col, asc, models, columns, df_):
|
| 118 |
+
print(f"[sort_and_render] called: col={col}, asc={asc}, models={models}, columns={columns}")
|
| 119 |
+
filtered_df = update_leaderboard(models, columns, df_, col, asc)
|
| 120 |
+
# ์ ๋ ฌ ์ํ๋ฅผ DataFrame์ ์์๋ก ์ ์ฅํด ํค๋์ ๋ฐ์
|
| 121 |
+
filtered_df._sort_col = col
|
| 122 |
+
filtered_df._sort_asc = asc
|
| 123 |
+
return render_leaderboard_html(filtered_df.round(3)), col, asc
|
| 124 |
+
|
| 125 |
+
leaderboard_html = render_leaderboard_html(df_badge.round(3))
|
| 126 |
+
leaderboard_html_comp = gr.HTML(value=leaderboard_html, elem_id="leaderboard-table")
|
| 127 |
+
|
| 128 |
+
# ์ ๋ ฌ ํธ๋ฆฌ๊ฑฐ์ฉ hidden textbox ์ถ๊ฐ
|
| 129 |
+
sort_trigger = gr.Textbox(visible=False, elem_id="sort-leaderboard-trigger")
|
| 130 |
+
|
| 131 |
+
# sort-arrow ํด๋ฆญ ์ ํญ์ ์๋ก์ด ๊ฐ์ผ๋ก value๋ฅผ ๋ณ๊ฒฝํ๋ JS ์ฝ์
(์ ๋ ฌ ๋ฐฉํฅ ํฌํจ)
|
| 132 |
+
sort_js = """
|
| 133 |
+
<script>
|
| 134 |
+
(function() {
|
| 135 |
+
document.addEventListener('DOMContentLoaded', function() {
|
| 136 |
+
const table = document.getElementById('leaderboard-table');
|
| 137 |
+
if (!table) return;
|
| 138 |
+
table.addEventListener('click', function(e) {
|
| 139 |
+
const arrow = e.target.closest('.sort-arrow');
|
| 140 |
+
if (arrow) {
|
| 141 |
+
const col = arrow.getAttribute('data-col');
|
| 142 |
+
const asc = arrow.getAttribute('data-asc');
|
| 143 |
+
// ํญ์ ์๋ก์ด ๊ฐ์ผ๋ก value๋ฅผ ๋ณ๊ฒฝํ์ฌ change ์ด๋ฒคํธ ๊ฐ์ ๋ฐ์
|
| 144 |
+
const trigger = document.querySelector('#sort-leaderboard-trigger input');
|
| 145 |
+
if (trigger) {
|
| 146 |
+
trigger.value = col + '|' + asc + '|' + Date.now();
|
| 147 |
+
trigger.dispatchEvent(new Event('input', { bubbles: true }));
|
| 148 |
+
trigger.dispatchEvent(new Event('change', { bubbles: true }));
|
| 149 |
+
}
|
| 150 |
+
}
|
| 151 |
+
});
|
| 152 |
+
});
|
| 153 |
+
})();
|
| 154 |
+
</script>
|
| 155 |
+
"""
|
| 156 |
+
# ์ ๋ ฌ ๋ฒํผ ํด๋ฆญ ์์๋ update_leaderboard๋ฅผ ํธ์ถํ๋๋ก wiring
|
| 157 |
+
def sort_trigger_change(col_val, models, columns, df_, prev_col, prev_asc):
|
| 158 |
+
print(f"[sort_trigger.change] col_val={col_val}, prev_col={prev_col}, prev_asc={prev_asc}")
|
| 159 |
+
col, asc = col_val.split('|')[0], col_val.split('|')[1].lower() == "true"
|
| 160 |
+
return sort_and_render(col, asc, models, columns, df_)
|
| 161 |
+
|
| 162 |
+
sort_trigger.change(
|
| 163 |
+
fn=sort_trigger_change,
|
| 164 |
+
inputs=[sort_trigger, model_selector, column_selector, df_state, sort_col_state, sort_asc_state],
|
| 165 |
+
outputs=[leaderboard_html_comp, sort_col_state, sort_asc_state]
|
| 166 |
+
)
|
| 167 |
+
|
| 168 |
+
# ์ปค์คํ
JS๋ฅผ ์๋จ ํ
์ด๋ธ์ ์ฝ์
|
| 169 |
+
leaderboard_html_comp.style = None # gr.HTML์๋ style ํ๋ผ๋ฏธํฐ๊ฐ ์์ผ๋ฏ๋ก, ์๋์์ ์ฝ์
|
| 170 |
+
leaderboard_html_comp.value += sort_js
|
| 171 |
+
|
| 172 |
+
# Pretty leaderboard preview (uses only 'Model' and 'Score' columns)
|
| 173 |
+
pretty_html = gr.HTML(value=render_pretty_leaderboard_html(df.round(3)))
|
| 174 |
+
|
| 175 |
+
# Define change functions for user interaction
|
| 176 |
+
# ๋ชจ๋ UI ์ด๋ฒคํธ์์ update_leaderboard โ sort_and_render โ render_leaderboard_html ์์ผ๋ก ๊ฐฑ์
|
| 177 |
+
def filter_and_sort_search(query, df, sort_col, sort_asc):
|
| 178 |
+
print(f"[filter_and_sort_search] sort_col={sort_col}, sort_asc={sort_asc}")
|
| 179 |
+
filtered_df = search_leaderboard(query, df, sort_col, sort_asc)
|
| 180 |
+
# ์ ๋ ฌ ์ํ๋ฅผ DataFrame์ ์์๋ก ์ ์ฅํด ํค๋์ ๋ฐ์
|
| 181 |
+
filtered_df._sort_col = sort_col
|
| 182 |
+
filtered_df._sort_asc = sort_asc
|
| 183 |
+
return render_leaderboard_html(filtered_df), sort_col, sort_asc
|
| 184 |
+
|
| 185 |
+
def filter_and_sort_model(models, columns, df, sort_col, sort_asc):
|
| 186 |
+
print(f"[filter_and_sort_model] sort_col={sort_col}, sort_asc={sort_asc}")
|
| 187 |
+
filtered_df = update_leaderboard(models, columns, df, sort_col, sort_asc)
|
| 188 |
+
filtered_df._sort_col = sort_col
|
| 189 |
+
filtered_df._sort_asc = sort_asc
|
| 190 |
+
return render_leaderboard_html(filtered_df), sort_col, sort_asc
|
| 191 |
+
|
| 192 |
+
def filter_and_sort_column(models, columns, df, sort_col, sort_asc):
|
| 193 |
+
print(f"[filter_and_sort_column] sort_col={sort_col}, sort_asc={sort_asc}")
|
| 194 |
+
filtered_df = update_leaderboard(models, columns, df, sort_col, sort_asc)
|
| 195 |
+
filtered_df._sort_col = sort_col
|
| 196 |
+
filtered_df._sort_asc = sort_asc
|
| 197 |
+
return render_leaderboard_html(filtered_df), sort_col, sort_asc
|
| 198 |
+
|
| 199 |
+
search_box.change(
|
| 200 |
+
fn=filter_and_sort_search,
|
| 201 |
+
inputs=[search_box, df_state, sort_col_state, sort_asc_state],
|
| 202 |
+
outputs=[leaderboard_html_comp, sort_col_state, sort_asc_state]
|
| 203 |
+
)
|
| 204 |
+
|
| 205 |
+
group_selector.change(fn=update_modelselector_group, inputs=[group_selector, df_state], outputs=model_selector)
|
| 206 |
+
model_selector.change(
|
| 207 |
+
fn=filter_and_sort_model,
|
| 208 |
+
inputs=[model_selector, column_selector, df_state, sort_col_state, sort_asc_state],
|
| 209 |
+
outputs=[leaderboard_html_comp, sort_col_state, sort_asc_state]
|
| 210 |
+
)
|
| 211 |
+
|
| 212 |
+
# column_selector ๋ณ๊ฒฝ ์์๋ ํญ์ ์ต์ sort_col, sort_asc๋ฅผ ์ ์ง
|
| 213 |
+
column_selector.change(
|
| 214 |
+
fn=filter_and_sort_column,
|
| 215 |
+
inputs=[model_selector, column_selector, df_state, sort_col_state, sort_asc_state],
|
| 216 |
+
outputs=[leaderboard_html_comp, sort_col_state, sort_asc_state]
|
| 217 |
+
)
|
| 218 |
+
|
| 219 |
+
return {
|
| 220 |
+
"search_box": search_box,
|
| 221 |
+
"group_selector": group_selector,
|
| 222 |
+
"column_selector": column_selector,
|
| 223 |
+
"model_selector": model_selector,
|
| 224 |
+
"leaderboard_html_comp": leaderboard_html_comp,
|
| 225 |
+
"sort_trigger": sort_trigger,
|
| 226 |
+
"df_state": df_state,
|
| 227 |
+
"pretty_html": pretty_html
|
| 228 |
+
}
|
utils.py
CHANGED
|
@@ -25,4 +25,18 @@ def get_profile_and_organizations(profile: gr.OAuthProfile | None, oauth_token:
|
|
| 25 |
else:
|
| 26 |
output_org = [org["name"] for org in whoami(oauth_token.token)["orgs"]]
|
| 27 |
|
| 28 |
-
return output_profile, output_org
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
else:
|
| 26 |
output_org = [org["name"] for org in whoami(oauth_token.token)["orgs"]]
|
| 27 |
|
| 28 |
+
return output_profile, output_org
|
| 29 |
+
|
| 30 |
+
def download_with_restart(snapshot_download_func, repo_id, local_dir, repo_type, token, restart_func):
|
| 31 |
+
try:
|
| 32 |
+
print(local_dir)
|
| 33 |
+
snapshot_download_func(
|
| 34 |
+
repo_id=repo_id,
|
| 35 |
+
local_dir=local_dir,
|
| 36 |
+
repo_type=repo_type,
|
| 37 |
+
tqdm_class=None,
|
| 38 |
+
etag_timeout=30,
|
| 39 |
+
token=token
|
| 40 |
+
)
|
| 41 |
+
except Exception:
|
| 42 |
+
restart_func()
|