Kyuho Heo commited on
Commit
e74285c
ยท
1 Parent(s): 2b726be

spacerank

Browse files
.gitattributes CHANGED
@@ -25,6 +25,7 @@
25
  *.safetensors filter=lfs diff=lfs merge=lfs -text
26
  saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
  *.tar.* filter=lfs diff=lfs merge=lfs -text
 
28
  *.tflite filter=lfs diff=lfs merge=lfs -text
29
  *.tgz filter=lfs diff=lfs merge=lfs -text
30
  *.wasm filter=lfs diff=lfs merge=lfs -text
@@ -32,4 +33,3 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
35
- scale-hf-logo.png filter=lfs diff=lfs merge=lfs -text
 
25
  *.safetensors filter=lfs diff=lfs merge=lfs -text
26
  saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
  *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
  *.tflite filter=lfs diff=lfs merge=lfs -text
30
  *.tgz filter=lfs diff=lfs merge=lfs -text
31
  *.wasm filter=lfs diff=lfs merge=lfs -text
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
README.md CHANGED
@@ -1,49 +1,14 @@
1
  ---
2
- title: Leaderboard
3
- emoji: ๐Ÿฅ‡
4
  colorFrom: green
5
- colorTo: indigo
6
  sdk: gradio
 
7
  app_file: app.py
8
- pinned: true
9
  license: apache-2.0
10
- short_description: Leaderboards for evaluating LLMs
11
- sdk_version: 5.19.0
12
  hf_oauth: true
13
- hf_oauth_scopes:
14
- - read-repos
15
  ---
16
 
17
- # Start the configuration
18
-
19
- Most of the variables to change for a default leaderboard are in `src/env.py` (replace the path for your leaderboard) and `src/about.py` (for tasks).
20
-
21
- Results files should have the following format and be stored as json files:
22
- ```json
23
- {
24
- "config": {
25
- "model_dtype": "torch.float16", # or torch.bfloat16 or 8bit or 4bit
26
- "model_name": "path of the model on the hub: org/model",
27
- "model_sha": "revision on the hub",
28
- },
29
- "results": {
30
- "task_name": {
31
- "metric_name": score,
32
- },
33
- "task_name2": {
34
- "metric_name": score,
35
- }
36
- }
37
- }
38
- ```
39
-
40
- Request files are created automatically by this tool.
41
-
42
- If you encounter problem on the space, don't hesitate to restart it to remove the create eval-queue, eval-queue-bk, eval-results and eval-results-bk created folder.
43
-
44
- # Code logic for more complex edits
45
-
46
- You'll find
47
- - the main table' columns names and properties in `src/display/utils.py`
48
- - the logic to read all results and request files, then convert them in dataframe lines, in `src/leaderboard/read_evals.py`, and `src/populate.py`
49
- - the logic to allow or filter submissions in `src/submission/submit.py` and `src/submission/check_validity.py`
 
1
  ---
2
+ title: test_space
3
+ emoji: ๐Ÿ‘€
4
  colorFrom: green
5
+ colorTo: red
6
  sdk: gradio
7
+ sdk_version: 5.38.0
8
  app_file: app.py
9
+ pinned: false
10
  license: apache-2.0
 
 
11
  hf_oauth: true
 
 
12
  ---
13
 
14
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app.py CHANGED
@@ -6,8 +6,8 @@ from apscheduler.schedulers.background import BackgroundScheduler
6
  from huggingface_hub import snapshot_download
7
  from src.data_utils import get_dataframe_category, get_dataframe_language
8
  import src.config as configs
9
- from utils import get_profile, get_organizations, get_profile_and_organizations
10
- from typing import Optional, Tuple
11
 
12
  from src.about import (
13
  CITATION_BUTTON_LABEL,
@@ -36,24 +36,36 @@ from src.populate import get_evaluation_queue_df, get_leaderboard_df
36
  from src.submission.submit import add_new_eval_option1, add_new_eval_option2
37
 
38
 
 
 
 
 
 
 
 
 
 
 
39
  def restart_space():
40
  API.restart_space(repo_id=REPO_ID)
41
 
42
  ### Space initialisation
43
- try:
44
- print(EVAL_REQUESTS_PATH)
45
- snapshot_download(
46
- repo_id=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN
47
- )
48
- except Exception:
49
- restart_space()
50
- try:
51
- print(EVAL_RESULTS_PATH)
52
- snapshot_download(
53
- repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN
54
- )
55
- except Exception:
56
- restart_space()
 
 
57
 
58
  (
59
  finished_eval_queue_df,
@@ -61,9 +73,6 @@ except Exception:
61
  pending_eval_queue_df,
62
  ) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
63
 
64
- tab_keys = ["Category", "Language"]
65
-
66
-
67
  demo = gr.Blocks(css=custom_css)
68
  with demo:
69
  gr.HTML(TITLE)
@@ -71,93 +80,21 @@ with demo:
71
  user_state = gr.State()
72
  organization_state = gr.State()
73
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
74
-
75
- def search_leaderboard(query, df):
76
- if not query.strip():
77
- return df
78
- filtered = df[df.apply(lambda row: row.astype(str).str.contains(query, case=False).any(), axis=1)]
79
- return filtered
80
-
81
- def update_modelselector_group(groups, df):
82
- """
83
- groups (gr.CheckboxGroup): List of currently selected models
84
- df (DataFrame or gr.State): Current dataframe
85
- """
86
- print("groups:", groups)
87
- if not groups:
88
- return None
89
-
90
- filtered_df = df[df["Group"].isin(groups)]
91
- models = filtered_df["Model Name"].unique().tolist()
92
-
93
- return models
94
-
95
- def update_columnselector_group(columns, groups, df):
96
- print("column groups:", groups)
97
-
98
- columns = [c for c in columns if c in df.columns[:3]]
99
-
100
- columns.extend(df.columns[3:])
101
-
102
- print(columns)
103
-
104
- return columns
105
-
106
-
107
- def update_leaderboard(models, columns, df):
108
- print("models:", models)
109
- print("columns:", columns)
110
-
111
- filtered_df = df[df["Model Name"].isin(models)]
112
- filtered_columns = [c for c in df.columns if c in columns or c in ["Model Name"]]
113
- filtered_df = filtered_df[filtered_columns]
114
-
115
- for col in filtered_df.select_dtypes(include="number").columns:
116
- filtered_df[col] = filtered_df[col].round(3)
117
-
118
- return filtered_df
119
-
120
- def get_models_by_group(df, groups):
121
- return df[df["Group"].isin(groups)]["Model Name"].tolist()
122
-
123
- for _, key in enumerate(tab_keys):
124
  if key == "Category":
125
- tab_name = "Leaderboard 1"
 
126
  else:
127
- tab_name = "Leaderboard 2"
128
- with gr.TabItem(tab_name, visible=True):
129
- if key == "Category":
130
- df = get_dataframe_category()
131
- else:
132
- df = get_dataframe_language()
133
- df_state = gr.State(df)
134
-
135
- with gr.Row():
136
- with gr.Column():
137
- search_box = gr.Textbox(label="Search Model by Name")
138
- group_list = df["Group"].unique().tolist()
139
- group_selector = gr.CheckboxGroup(choices=df["Group"].unique().tolist(), value=group_list, label="Select Model Group")
140
-
141
- if key == "Category":
142
- column_selector = gr.CheckboxGroup(choices=df.columns.tolist()[3:], value=configs.ON_LOAD_COLUMNS_CATEGORY[3:], label="Select Columns")
143
- else:
144
- column_selector = gr.CheckboxGroup(choices=df.columns.tolist()[3:], value=configs.ON_LOAD_COLUMNS_LANG[3:], label="Select Columns")
145
-
146
- with gr.Column():
147
- with gr.Accordion("Model List", open=False):
148
- model_group = df["Model Name"].tolist()
149
- model_selector = gr.CheckboxGroup(choices=df["Model Name"].tolist(), value=model_group, label="Select Models")
150
-
151
- ld = gr.DataFrame(
152
- value=df.round(3)
153
- )
154
-
155
- # Define change functions for user interaction
156
- search_box.change(fn=search_leaderboard, inputs=[search_box, df_state], outputs=ld)
157
- group_selector.change(fn=update_modelselector_group, inputs=[group_selector, df_state], outputs=model_selector)
158
- model_selector.change(fn=update_leaderboard, inputs=[model_selector, column_selector, df_state], outputs=ld)
159
- column_selector.change(fn=update_leaderboard, inputs=[model_selector, column_selector, df_state], outputs=ld)
160
-
161
  with gr.TabItem("๐Ÿ“ About", elem_id="llm-benchmark-tab-table", id=2):
162
  gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
163
 
@@ -175,10 +112,10 @@ with demo:
175
  with gr.Row():
176
  with gr.Column():
177
  benchmark_type = gr.Dropdown(
178
- choices=["ProductivityBench"],
179
  label="The name of the benchmark to be evaluated",
180
  multiselect=False,
181
- value="ProductivityBench",
182
  interactive=True,
183
  )
184
  model_name_textbox = gr.Textbox(label="Model name")
@@ -192,7 +129,7 @@ with demo:
192
  )
193
  base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)")
194
  vllm_version_type = gr.Dropdown(
195
- choices=["v0.9.2", "v0.9.2rc2", "v0.9.2rc1", "v0.9.1", "v0.9.1rc2", "v0.9.1rc1", "v0.9.0.1", "v0.9.0", "v0.8.5", "v0.8.5.post1", "v0.8.4", "v0.8.3", "v0.8.3rc1", "v0.8.2", "v0.8.1", "v0.8.0", "v0.8.0rc2", "v0.8.0rc1", "v0.7.3", "v0.7.2", "v0.7.1", "v0.6.6", "v0.6.6.post1", "v0.6.5", "v0.6.4.post1", "v0.6.4", "v0.6.3.post1", "v0.6.2", "v0.6.1", "v0.6.1.post2", "v0.6.1.post1", "v0.6.0"],
196
  label="vLLM version",
197
  multiselect=False,
198
  value="v0.9.2",
@@ -239,10 +176,10 @@ with demo:
239
  with gr.Row():
240
  with gr.Column():
241
  benchmark_type2 = gr.Dropdown(
242
- choices=["ProductivityBench"],
243
  label="The name of the benchmark to be evaluated",
244
  multiselect=False,
245
- value="ProductivityBench",
246
  interactive=True,
247
  )
248
  model_name_textbox2 = gr.Textbox(label="Model name")
@@ -313,7 +250,8 @@ with demo:
313
  show_copy_button=True,
314
  )
315
 
 
316
  scheduler = BackgroundScheduler()
317
  scheduler.add_job(restart_space, "interval", seconds=1800)
318
  scheduler.start()
319
- demo.queue(default_concurrency_limit=40).launch()
 
6
  from huggingface_hub import snapshot_download
7
  from src.data_utils import get_dataframe_category, get_dataframe_language
8
  import src.config as configs
9
+ from utils import get_profile, get_organizations, get_profile_and_organizations, download_with_restart
10
+
11
 
12
  from src.about import (
13
  CITATION_BUTTON_LABEL,
 
36
  from src.submission.submit import add_new_eval_option1, add_new_eval_option2
37
 
38
 
39
+ from handlers import (
40
+ search_leaderboard,
41
+ update_modelselector_group,
42
+ update_columnselector_group,
43
+ update_leaderboard,
44
+ get_models_by_group,
45
+ )
46
+ from ui import create_leaderboard_tab
47
+ from constants import TAB_KEYS, TAB_NAMES, VLLM_VERSIONS
48
+
49
  def restart_space():
50
  API.restart_space(repo_id=REPO_ID)
51
 
52
  ### Space initialisation
53
+ download_with_restart(
54
+ snapshot_download,
55
+ repo_id=QUEUE_REPO,
56
+ local_dir=EVAL_REQUESTS_PATH,
57
+ repo_type="dataset",
58
+ token=TOKEN,
59
+ restart_func=restart_space
60
+ )
61
+ download_with_restart(
62
+ snapshot_download,
63
+ repo_id=RESULTS_REPO,
64
+ local_dir=EVAL_RESULTS_PATH,
65
+ repo_type="dataset",
66
+ token=TOKEN,
67
+ restart_func=restart_space
68
+ )
69
 
70
  (
71
  finished_eval_queue_df,
 
73
  pending_eval_queue_df,
74
  ) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
75
 
 
 
 
76
  demo = gr.Blocks(css=custom_css)
77
  with demo:
78
  gr.HTML(TITLE)
 
80
  user_state = gr.State()
81
  organization_state = gr.State()
82
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
83
+ for _, key in enumerate(TAB_KEYS):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
  if key == "Category":
85
+ df = get_dataframe_category()
86
+ column_selector_value = configs.ON_LOAD_COLUMNS_CATEGORY[3:]
87
  else:
88
+ df = get_dataframe_language()
89
+ column_selector_value = configs.ON_LOAD_COLUMNS_LANG[3:]
90
+ create_leaderboard_tab(
91
+ df,
92
+ key,
93
+ search_leaderboard,
94
+ update_modelselector_group,
95
+ update_leaderboard,
96
+ column_selector_value
97
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98
  with gr.TabItem("๐Ÿ“ About", elem_id="llm-benchmark-tab-table", id=2):
99
  gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
100
 
 
112
  with gr.Row():
113
  with gr.Column():
114
  benchmark_type = gr.Dropdown(
115
+ choices=["TRUEBench v0.1"],
116
  label="The name of the benchmark to be evaluated",
117
  multiselect=False,
118
+ value="TRUEBench v0.1",
119
  interactive=True,
120
  )
121
  model_name_textbox = gr.Textbox(label="Model name")
 
129
  )
130
  base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)")
131
  vllm_version_type = gr.Dropdown(
132
+ choices=VLLM_VERSIONS,
133
  label="vLLM version",
134
  multiselect=False,
135
  value="v0.9.2",
 
176
  with gr.Row():
177
  with gr.Column():
178
  benchmark_type2 = gr.Dropdown(
179
+ choices=["TRUEBench v0.1"],
180
  label="The name of the benchmark to be evaluated",
181
  multiselect=False,
182
+ value="TRUEBench v0.1",
183
  interactive=True,
184
  )
185
  model_name_textbox2 = gr.Textbox(label="Model name")
 
250
  show_copy_button=True,
251
  )
252
 
253
+
254
  scheduler = BackgroundScheduler()
255
  scheduler.add_job(restart_space, "interval", seconds=1800)
256
  scheduler.start()
257
+ demo.queue(default_concurrency_limit=40).launch()
constants.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # constants.py
2
+
3
+ TAB_KEYS = ["Category", "Language"]
4
+
5
+ TAB_NAMES = {
6
+ "Category": "TRUEBench v0.1 (Category ๐Ÿ”ง)",
7
+ "Language": "TRUEBench v0.1 (Language ๐ŸŒŽ)"
8
+ }
9
+
10
+ VLLM_VERSIONS = [
11
+ "v0.9.2", "v0.9.2rc2", "v0.9.2rc1", "v0.9.1", "v0.9.1rc2", "v0.9.1rc1",
12
+ "v0.9.0.1", "v0.9.0", "v0.8.5", "v0.8.5.post1", "v0.8.4", "v0.8.3",
13
+ "v0.8.3rc1", "v0.8.2", "v0.8.1", "v0.8.0", "v0.8.0rc2", "v0.8.0rc1",
14
+ "v0.7.3", "v0.7.2", "v0.7.1", "v0.6.6", "v0.6.6.post1", "v0.6.5",
15
+ "v0.6.4.post1", "v0.6.4", "v0.6.3.post1", "v0.6.2", "v0.6.1",
16
+ "v0.6.1.post2", "v0.6.1.post1", "v0.6.0"
17
+ ]
18
+
19
+ # ๋ฆฌ๋”๋ณด๋“œ ํ•„์ˆ˜ ์ปฌ๋Ÿผ(ํ•ญ์ƒ ํฌํ•จ๋˜์–ด์•ผ ํ•จ)
20
+ LEADERBOARD_REQUIRED_COLUMNS = [
21
+ "Model Name", "Group", "Overall", "Model Type", "Output Form", "Rank"
22
+ ]
23
+
24
+ # Model badge mappings (centralized for both UI and backend)
25
+ MODEL_TYPE_MAP = {
26
+ "deepseek_r1": "open",
27
+ "deepseek_r1_0528": "open",
28
+ "Qwen3-32B": "open",
29
+ "Gauss2.3-Think-250708": "closed"
30
+ }
31
+ OUTPUT_FORM_MAP = {
32
+ "deepseek_r1": "reasoning",
33
+ "deepseek_r1_0528": "normal",
34
+ "Qwen3-32B": "reasoning",
35
+ "Gauss2.3-Think-250708": "reasoning"
36
+ }
handlers.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+
3
+ def search_leaderboard(query, df, sort_col=None, sort_asc=True):
4
+ if not query.strip():
5
+ filtered = df
6
+ else:
7
+ filtered = df[df.apply(lambda row: row.astype(str).str.contains(query, case=False).any(), axis=1)]
8
+ if sort_col and sort_col in filtered.columns:
9
+ filtered = filtered.sort_values(sort_col, ascending=sort_asc).reset_index(drop=True)
10
+ return filtered
11
+
12
+ def update_modelselector_group(groups, df):
13
+ """
14
+ groups (gr.CheckboxGroup): List of currently selected models
15
+ df (DataFrame or gr.State): Current dataframe
16
+ """
17
+ print("groups:", groups)
18
+ if not groups:
19
+ return None
20
+
21
+ filtered_df = df[df["Group"].isin(groups)]
22
+ models = filtered_df["Model Name"].unique().tolist()
23
+
24
+ return models
25
+
26
+ def update_columnselector_group(columns, groups, df):
27
+ print("column groups:", groups)
28
+
29
+ columns = [c for c in columns if c in df.columns[:3]]
30
+
31
+ columns.extend(df.columns[3:])
32
+
33
+ print(columns)
34
+
35
+ return columns
36
+
37
+ from constants import LEADERBOARD_REQUIRED_COLUMNS, MODEL_TYPE_MAP, OUTPUT_FORM_MAP
38
+
39
+ def update_leaderboard(models, columns, df, sort_col=None, sort_asc=True):
40
+ print("models:", models)
41
+ print("columns:", columns)
42
+ print("sort_col:", sort_col, "sort_asc:", sort_asc)
43
+
44
+ # ํ•„์ˆ˜ ์ปฌ๋Ÿผ ํ•ญ์ƒ ํฌํ•จ
45
+ columns = list(dict.fromkeys(LEADERBOARD_REQUIRED_COLUMNS + list(columns)))
46
+
47
+ # ๋ฑƒ์ง€/๋žญํฌ ๋ Œ๋”๋ง์— ํ•„์š”ํ•œ ์ปฌ๋Ÿผ ํ•ญ์ƒ ํฌํ•จ
48
+ always_include = ["Model Name", "Model Type", "Output Form", "Rank"]
49
+ filtered_df = df[df["Model Name"].isin(models)].copy()
50
+
51
+ # Model Type, Output Form, Rank ์ปฌ๋Ÿผ์ด ์—†์œผ๋ฉด ์ƒ์„ฑ
52
+ if "Model Type" not in filtered_df.columns:
53
+ filtered_df["Model Type"] = filtered_df["Model Name"].map(MODEL_TYPE_MAP).fillna("open")
54
+ if "Output Form" not in filtered_df.columns:
55
+ filtered_df["Output Form"] = filtered_df["Model Name"].map(OUTPUT_FORM_MAP).fillna("normal")
56
+ if "Rank" not in filtered_df.columns:
57
+ # ์ •๋ ฌ ๊ธฐ์ค€: sort_col์ด ์žˆ์œผ๋ฉด ํ•ด๋‹น ์ปฌ๋Ÿผ, ์—†์œผ๋ฉด Overall
58
+ rank_col = sort_col if sort_col and sort_col in filtered_df.columns else ("Overall" if "Overall" in filtered_df.columns else None)
59
+ if rank_col:
60
+ filtered_df = filtered_df.sort_values(rank_col, ascending=not sort_asc).reset_index(drop=True)
61
+ filtered_df["Rank"] = filtered_df.index + 1
62
+ else:
63
+ filtered_df["Rank"] = range(1, len(filtered_df) + 1)
64
+
65
+ # always_include ์ปฌ๋Ÿผ์€ ๋ฌด์กฐ๊ฑด ํฌํ•จ
66
+ filtered_columns = [c for c in df.columns if c in columns or c in always_include]
67
+ for col in always_include:
68
+ if col not in filtered_columns:
69
+ filtered_columns.append(col)
70
+
71
+ # ์ค‘๋ณต ์ œ๊ฑฐ ๋ฐ ์ˆœ์„œ ๋ณด์žฅ
72
+ filtered_columns = list(dict.fromkeys(filtered_columns))
73
+ filtered_df = filtered_df[filtered_columns]
74
+
75
+ for col in filtered_df.select_dtypes(include="number").columns:
76
+ filtered_df[col] = filtered_df[col].round(3)
77
+
78
+ if sort_col and sort_col in filtered_df.columns:
79
+ filtered_df = filtered_df.sort_values(sort_col, ascending=sort_asc).reset_index(drop=True)
80
+ # Rank ์žฌ๊ณ„์‚ฐ
81
+ filtered_df["Rank"] = filtered_df.index + 1
82
+
83
+ return filtered_df
84
+
85
+ def get_models_by_group(df, groups):
86
+ return df[df["Group"].isin(groups)]["Model Name"].tolist()
src/about.py CHANGED
@@ -21,13 +21,13 @@ NUM_FEWSHOT = 0 # Change with your few shot
21
 
22
 
23
  # Your leaderboard name
24
- TITLE = """<h1 align="center" id="space-title">๐Ÿฅ‡ Leaderboard</h1>"""
25
 
26
  # What does your leaderboard evaluate?
27
  INTRODUCTION_TEXT = """
28
  Leaderboards for LLM evaluation.
29
 
30
- Our benchmark is designed to evaluate LLMs for Productivity Assistants which stand for human's job productivity.
31
  """
32
 
33
  # Which evaluations are you running? how can people reproduce what you have?
@@ -87,7 +87,7 @@ EVALUATION_QUEUE_TEXT_OPTION2 = """
87
 
88
  EVALUATION_QUEUE_TEXT_OPTION3 = """
89
  # (Option 3) Pull Request
90
- If Option 1 & 2 is unavailable, make [PR](https://huggingface.co/spaces/Jongyoon-Song/test_space/discussions?new_pr=true) with [ADD_MODEL] prefix with contents as follows:
91
 
92
  ```
93
  ### Open-weight models:
 
21
 
22
 
23
  # Your leaderboard name
24
+ TITLE = """<h1 align="center" id="space-title">๐Ÿฅ‡ Test Space</h1>"""
25
 
26
  # What does your leaderboard evaluate?
27
  INTRODUCTION_TEXT = """
28
  Leaderboards for LLM evaluation.
29
 
30
+ *TRUE(Trustworthy Real-world Usage Evaluation)Bench* is designed to evaluate LLMs for Productivity Assistants which stand for human's job productivity.
31
  """
32
 
33
  # Which evaluations are you running? how can people reproduce what you have?
 
87
 
88
  EVALUATION_QUEUE_TEXT_OPTION3 = """
89
  # (Option 3) Pull Request
90
+ If Option 1 & 2 is unavailable, make [PR](https://huggingface.co/spaces/coms1580/test_space/discussions?new_pr=true) with [ADD_MODEL] prefix with contents as follows:
91
 
92
  ```
93
  ### Open-weight models:
src/config.py CHANGED
@@ -2,34 +2,34 @@ ON_LOAD_COLUMNS_LANG = [
2
  "Model Name",
3
  "Group",
4
  "Overall",
5
- "L1",
6
- "L2",
7
- "L3",
8
- "L4",
9
- "L5",
10
- "L6",
11
- "L7",
12
- "L8",
13
- "L9",
14
- "L10",
15
- "L11",
16
- "L12"
17
  ]
18
 
19
  ON_LOAD_COLUMNS_CATEGORY = [
20
  "Model Name",
21
  "Group",
22
  "Overall",
23
- "C1",
24
- "C2",
25
- "C3",
26
- "C4",
27
- "C5",
28
- "C6",
29
- "C7",
30
- "C8",
31
- "C9",
32
- "C10"
33
  ]
34
 
35
  COLUMN_GROUP_LIST = [
 
2
  "Model Name",
3
  "Group",
4
  "Overall",
5
+ "KO",
6
+ "EN",
7
+ "JA",
8
+ "ZH",
9
+ "PL",
10
+ "DE",
11
+ "PT",
12
+ "ES",
13
+ "FR",
14
+ "IT",
15
+ "RU",
16
+ "VI"
17
  ]
18
 
19
  ON_LOAD_COLUMNS_CATEGORY = [
20
  "Model Name",
21
  "Group",
22
  "Overall",
23
+ "Content Generation",
24
+ "Editing",
25
+ "Data Analysis",
26
+ "Reasoning",
27
+ "Hallucination",
28
+ "Safety",
29
+ "Repetition",
30
+ "Summarization",
31
+ "Translation",
32
+ "Multi-Turn"
33
  ]
34
 
35
  COLUMN_GROUP_LIST = [
src/data/export_category_250618.csv DELETED
@@ -1,3 +0,0 @@
1
- "Model Name" "Group" "Overall" "C1" "C2" "C3" "C4" "C5" "C6" "C7" "C8" "C9" "C10"
2
- "M1" "G1" "0.00" "0.00" "0.00" "0.00" "0.00" "0.00" "0.00" "0.00" "0.00" "0.00" "0.00"
3
- "M2" "G2" "0.00" "0.00" "0.00" "0.00" "0.00" "0.00" "0.00" "0.00" "0.00" "0.00" "0.00"
 
 
 
 
src/data/export_category_250709.csv ADDED
Binary file (1.26 kB). View file
 
src/data/export_lang_250618.csv DELETED
@@ -1,3 +0,0 @@
1
- "Model Name" "Group" "Overall" "L1" "L2" "L3" "L4" "L5" "L6" "L7" "L8" "L9" "L10" "L11" "L12"
2
- "M1" "G1" "0.00" "0.00" "0.00" "0.00" "0.00" "0.00" "0.00" "0.00" "0.00" "0.00" "0.00" "0.00" "0.00"
3
- "M2" "G2" "0.00" "0.00" "0.00" "0.00" "0.00" "0.00" "0.00" "0.00" "0.00" "0.00" "0.00" "0.00" "0.00"
 
 
 
 
src/data/export_lang_250709.csv ADDED
Binary file (958 Bytes). View file
 
src/data_utils.py CHANGED
@@ -3,12 +3,12 @@ from pathlib import Path
3
 
4
  def get_dataframe_category():
5
  abs_path = Path(__file__).parent
6
- df = pd.read_csv(str(abs_path / "data/export_category_250618.csv"), encoding='utf-8', delimiter=" ")
7
  df = df.sort_values("Overall", ascending=False)
8
  return df
9
 
10
  def get_dataframe_language():
11
  abs_path = Path(__file__).parent
12
- df = pd.read_csv(str(abs_path / "data/export_lang_250618.csv"), encoding='utf-8', delimiter=" ")
13
  df = df.sort_values("Overall", ascending=False)
14
  return df
 
3
 
4
  def get_dataframe_category():
5
  abs_path = Path(__file__).parent
6
+ df = pd.read_csv(str(abs_path / "data/export_category_250709.csv"), encoding='utf-16', delimiter=" ")
7
  df = df.sort_values("Overall", ascending=False)
8
  return df
9
 
10
  def get_dataframe_language():
11
  abs_path = Path(__file__).parent
12
+ df = pd.read_csv(str(abs_path / "data/export_lang_250709.csv"), encoding='utf-16', delimiter=" ")
13
  df = df.sort_values("Overall", ascending=False)
14
  return df
src/display/css_html_js.py CHANGED
@@ -1,5 +1,128 @@
1
  custom_css = """
2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  .markdown-text {
4
  font-size: 16px !important;
5
  }
@@ -22,7 +145,15 @@ custom_css = """
22
  }
23
 
24
  #leaderboard-table {
25
- margin-top: 15px
 
 
 
 
 
 
 
 
26
  }
27
 
28
  #leaderboard-table-lite {
@@ -94,6 +225,53 @@ custom_css = """
94
  #box-filter > .form{
95
  border: 0
96
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
  """
98
 
99
  get_window_url_params = """
@@ -103,3 +281,105 @@ get_window_url_params = """
103
  return url_params;
104
  }
105
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  custom_css = """
2
 
3
+ /* Sort arrow/button styles */
4
+ .sort-arrow, .sort-btn {
5
+ display: inline-flex;
6
+ align-items: center;
7
+ justify-content: center;
8
+ background: #23244a;
9
+ color: #ffd700 !important; /* ํ•ญ์ƒ ๋…ธ๋ž€์ƒ‰ */
10
+ border: 1.5px solid #ffd700; /* ๊ธˆ์ƒ‰ ํ…Œ๋‘๋ฆฌ */
11
+ border-radius: 6px;
12
+ font-size: 15px;
13
+ font-weight: 700;
14
+ margin-left: 6px;
15
+ margin-right: 2px;
16
+ padding: 2px 8px 2px 6px;
17
+ cursor: pointer;
18
+ transition: background 0.2s, color 0.2s, border 0.2s;
19
+ min-width: 28px;
20
+ min-height: 28px;
21
+ outline: none;
22
+ }
23
+ .sort-arrow.active, .sort-btn.active {
24
+ color: #ffd700 !important; /* ๊ธˆ์ƒ‰ */
25
+ border-color: #ffd700;
26
+ background: #1a237e;
27
+ }
28
+ .sort-arrow:hover, .sort-btn:hover {
29
+ background: #ffd700;
30
+ color: #23244a !important;
31
+ border-color: #ffd700;
32
+ }
33
+ .sort-arrow svg, .sort-btn svg {
34
+ margin-left: 2px;
35
+ margin-right: 0;
36
+ width: 1em;
37
+ height: 1em;
38
+ vertical-align: middle;
39
+ }
40
+
41
+ /* Enhanced leaderboard table styles */
42
+ .pretty-leaderboard-table {
43
+ width: 100%;
44
+ border-collapse: separate;
45
+ border-spacing: 0;
46
+ background: rgba(30, 34, 54, 0.98);
47
+ border-radius: 16px;
48
+ box-shadow: 0 4px 24px 0 rgba(16, 152, 247, 0.10), 0 1.5px 6px 0 rgba(227, 84, 84, 0.08);
49
+ overflow: hidden;
50
+ margin-bottom: 24px;
51
+ }
52
+ .pretty-leaderboard-table th, .pretty-leaderboard-table td {
53
+ padding: 12px 16px;
54
+ text-align: left;
55
+ border-bottom: 1px solid #23244a;
56
+ font-size: 15px;
57
+ }
58
+ .pretty-leaderboard-table th {
59
+ background: linear-gradient(90deg, #23244a 0%, #1a237e 100%);
60
+ color: #F5F6F7;
61
+ font-weight: 700;
62
+ letter-spacing: 0.5px;
63
+ border-bottom: 2px solid #1098F7;
64
+ }
65
+ .pretty-leaderboard-table tr:nth-child(even) {
66
+ background: rgba(245, 246, 247, 0.03);
67
+ }
68
+ .pretty-leaderboard-table tr:hover {
69
+ background: rgba(16, 152, 247, 0.08);
70
+ transition: background 0.2s;
71
+ }
72
+ .pretty-leaderboard-table td {
73
+ color: #F5F6F7;
74
+ vertical-align: middle;
75
+ }
76
+ .pretty-leaderboard-table tr:last-child td {
77
+ border-bottom: none;
78
+ }
79
+ .pretty-leaderboard-table th:first-child, .pretty-leaderboard-table td:first-child {
80
+ border-top-left-radius: 16px;
81
+ }
82
+ .pretty-leaderboard-table th:last-child, .pretty-leaderboard-table td:last-child {
83
+ border-top-right-radius: 16px;
84
+ }
85
+
86
+ /* Enhanced score bar styles */
87
+ .score-bar {
88
+ display: flex;
89
+ align-items: center;
90
+ gap: 12px;
91
+ width: 100%;
92
+ }
93
+ .score-bar-track {
94
+ flex-grow: 1;
95
+ height: 10px;
96
+ background: rgba(245, 246, 247, 0.12);
97
+ border-radius: 5px;
98
+ overflow: hidden;
99
+ max-width: 220px;
100
+ box-shadow: 0 1px 4px 0 rgba(16, 152, 247, 0.10);
101
+ }
102
+ .score-bar-fill {
103
+ height: 100%;
104
+ background: linear-gradient(90deg, #E35454, #1098F7);
105
+ border-radius: 5px;
106
+ transition: width 0.3s cubic-bezier(0.4,0,0.2,1);
107
+ }
108
+ .score-bar-value {
109
+ font-family: 'SF Mono', monospace;
110
+ font-weight: 600;
111
+ color: #F5F6F7;
112
+ min-width: 60px;
113
+ font-size: 14px;
114
+ }
115
+
116
+ body {
117
+ min-height: 100vh;
118
+ background: linear-gradient(135deg, #1a237e 0%, #311b92 100%);
119
+ background-image:
120
+ radial-gradient(rgba(255,255,255,0.12) 1.2px, transparent 1.2px),
121
+ radial-gradient(rgba(255,255,255,0.08) 1px, transparent 1px);
122
+ background-size: 40px 40px, 80px 80px;
123
+ background-position: 0 0, 20px 20px;
124
+ }
125
+
126
  .markdown-text {
127
  font-size: 16px !important;
128
  }
 
145
  }
146
 
147
  #leaderboard-table {
148
+ margin-top: 15px;
149
+ /* Space-themed background */
150
+ background: linear-gradient(135deg, #1a237e 0%, #311b92 100%);
151
+ position: relative;
152
+ background-image:
153
+ radial-gradient(rgba(255,255,255,0.15) 1.2px, transparent 1.2px),
154
+ radial-gradient(rgba(255,255,255,0.10) 1px, transparent 1px);
155
+ background-size: 40px 40px, 80px 80px;
156
+ background-position: 0 0, 20px 20px;
157
  }
158
 
159
  #leaderboard-table-lite {
 
225
  #box-filter > .form{
226
  border: 0
227
  }
228
+
229
+ /* Model type and output form badge styles */
230
+ .badge {
231
+ display: inline-block;
232
+ border-radius: 12px;
233
+ padding: 2px 10px;
234
+ font-size: 0.85em;
235
+ font-weight: 700;
236
+ margin-left: 6px;
237
+ box-shadow: 0 1px 4px rgba(0,0,0,0.10);
238
+ vertical-align: middle;
239
+ }
240
+ .badge-open {
241
+ background: linear-gradient(90deg, #2196f3, #21cbf3);
242
+ color: #fff;
243
+ }
244
+ .badge-closed {
245
+ background: linear-gradient(90deg, #757575, #bdbdbd);
246
+ color: #fff;
247
+ }
248
+ .badge-normal {
249
+ background: linear-gradient(90deg, #43a047, #66bb6a);
250
+ color: #fff;
251
+ }
252
+ .badge-reasoning {
253
+ background: linear-gradient(90deg, #8e24aa, #d500f9);
254
+ color: #fff;
255
+ }
256
+
257
+ /* Sort button styles */
258
+ .sort-btn {
259
+ background: #23244a;
260
+ color: #F5F6F7;
261
+ border: 1px solid #1098F7;
262
+ border-radius: 6px;
263
+ font-size: 13px;
264
+ font-weight: 700;
265
+ margin-left: 4px;
266
+ margin-right: 2px;
267
+ padding: 2px 7px;
268
+ cursor: pointer;
269
+ transition: background 0.2s, color 0.2s;
270
+ }
271
+ .sort-btn:hover {
272
+ background: #1098F7;
273
+ color: #fff;
274
+ }
275
  """
276
 
277
  get_window_url_params = """
 
281
  return url_params;
282
  }
283
  """
284
+
285
+ def get_rank_badge(rank: int) -> str:
286
+ """
287
+ Returns HTML for a rank badge (1st, 2nd, 3rd) with appropriate styling.
288
+ """
289
+ badge_styles = {
290
+ 1: ("1st", "linear-gradient(145deg, #ffd700, #ffc400)", "#000"),
291
+ 2: ("2nd", "linear-gradient(145deg, #9ca3af, #787C7E)", "#fff"),
292
+ 3: ("3rd", "linear-gradient(145deg, #CD7F32, #b36a1d)", "#fff"),
293
+ }
294
+ if rank in badge_styles:
295
+ label, gradient, text_color = badge_styles[rank]
296
+ return f'''
297
+ <div style="
298
+ display: inline-flex;
299
+ align-items: center;
300
+ justify-content: center;
301
+ min-width: 48px;
302
+ padding: 4px 12px;
303
+ background: {gradient};
304
+ color: {text_color};
305
+ border-radius: 6px;
306
+ font-weight: 700;
307
+ font-size: 1em;
308
+ box-shadow: 0 2px 4px rgba(0,0,0,0.18);
309
+ border: 1.5px solid #fff2;
310
+ ">
311
+ {label}
312
+ </div>
313
+ '''
314
+ return f'''
315
+ <div style="
316
+ display: inline-flex;
317
+ align-items: center;
318
+ justify-content: center;
319
+ min-width: 28px;
320
+ color: #a1a1aa;
321
+ font-weight: 500;
322
+ ">
323
+ {rank}
324
+ </div>
325
+ '''
326
+
327
+ def get_score_gauge(score: float, max_score: float = 1.0) -> str:
328
+ """
329
+ Returns HTML for an overall score gauge (progress bar style).
330
+ """
331
+ percent = min(max(score / max_score, 0), 1) * 100
332
+ return f'''
333
+ <div class="score-bar" style="margin: 0.5em 0;">
334
+ <div class="score-bar-track">
335
+ <div class="score-bar-fill" style="width: {percent}%;"></div>
336
+ </div>
337
+ <span class="score-bar-value">{score:.3f}</span>
338
+ </div>
339
+ '''
340
+
341
+ def get_leaderboard_table_html(df) -> str:
342
+ """
343
+ Returns HTML for a pretty leaderboard table using badge and gauge.
344
+ Expects df to have columns: 'Model', 'Score', 'Model Type', 'Output Form'.
345
+ """
346
+ def get_type_badge(model_type):
347
+ if model_type == "open":
348
+ return '<span class="badge badge-open">open</span>'
349
+ else:
350
+ return '<span class="badge badge-closed">closed</span>'
351
+
352
+ def get_output_badge(output_form):
353
+ if output_form == "reasoning":
354
+ return '<span class="badge badge-reasoning">reasoning</span>'
355
+ else:
356
+ return '<span class="badge badge-normal">normal</span>'
357
+
358
+ html = ['<table class="pretty-leaderboard-table">']
359
+ # Header
360
+ html.append(
361
+ "<thead><tr>"
362
+ "<th>Rank</th>"
363
+ "<th>Model</th>"
364
+ "<th>Overall Score</th>"
365
+ "</tr></thead>"
366
+ )
367
+ html.append("<tbody>")
368
+ for idx, row in enumerate(df.itertuples(index=False), 1):
369
+ model = getattr(row, "Model", "")
370
+ score = getattr(row, "Score", 0.0)
371
+ model_type = getattr(row, "Model_Type", getattr(row, "Model Type", "open"))
372
+ output_form = getattr(row, "Output_Form", getattr(row, "Output Form", "normal"))
373
+ badge = get_rank_badge(idx)
374
+ gauge = get_score_gauge(score)
375
+ type_badge = get_type_badge(model_type)
376
+ output_badge = get_output_badge(output_form)
377
+ html.append(
378
+ f"<tr>"
379
+ f"<td>{badge}</td>"
380
+ f"<td>{model} {type_badge} {output_badge}</td>"
381
+ f"<td>{gauge}</td>"
382
+ f"</tr>"
383
+ )
384
+ html.append("</tbody></table>")
385
+ return "\n".join(html)
src/display/formatting.py CHANGED
@@ -25,3 +25,128 @@ def has_no_nan_values(df, columns):
25
 
26
  def has_nan_values(df, columns):
27
  return df[columns].isna().any(axis=1)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
  def has_nan_values(df, columns):
27
  return df[columns].isna().any(axis=1)
28
+
29
+ def get_score_bar(score):
30
+ """
31
+ Generate HTML for a score bar with gradient styling.
32
+ Expects score in the range 0-100.
33
+ """
34
+ width = max(0, min(score, 100)) # Clamp to [0, 100]
35
+ return f"""
36
+ <div class="score-bar">
37
+ <div class="score-bar-track">
38
+ <div class="score-bar-fill" style="width: {width}%;"></div>
39
+ </div>
40
+ <span class="score-bar-value">{score:.3f}</span>
41
+ </div>
42
+ """
43
+
44
+ def render_leaderboard_html(df, overall_col="average"):
45
+ """
46
+ Render a DataFrame as an HTML table, replacing the overall_col with a gauge bar.
47
+ """
48
+ from .formatting import get_score_bar
49
+ from src.display.css_html_js import get_rank_badge
50
+
51
+ def get_type_badge(model_type):
52
+ if model_type == "open":
53
+ return '<span class="badge badge-open">open</span>'
54
+ else:
55
+ return '<span class="badge badge-closed">closed</span>'
56
+
57
+ def get_output_badge(output_form):
58
+ if output_form == "reasoning":
59
+ return '<span class="badge badge-reasoning">reasoning</span>'
60
+ else:
61
+ return '<span class="badge badge-normal">normal</span>'
62
+
63
+ # ์ˆจ๊ธธ ์ปฌ๋Ÿผ
64
+ hidden_cols = ["Model", "Model Type", "Output Form", "Rank"]
65
+
66
+ # Build table header
67
+ def get_sort_arrow(col, sort_col, sort_asc):
68
+ # "Model Name", "Group" ์ปฌ๋Ÿผ์„ ์ œ์™ธํ•œ ๋ชจ๋“  ์ปฌ๋Ÿผ์— ์ •๋ ฌ ๋ฒ„ํŠผ ๋…ธ์ถœ
69
+ if col in {"Model Name", "Group"}:
70
+ return ""
71
+ # ํ•˜๋‚˜์˜ ๋ฒ„ํŠผ(โ–ฒ ๋˜๋Š” โ–ผ)๋งŒ ๋…ธ์ถœ, ํด๋ฆญ ์‹œ asc๊ฐ€ ๋ฐ˜์ „๋จ
72
+ if col == sort_col:
73
+ # ํ˜„์žฌ ์ •๋ ฌ ์ƒํƒœ์— ๋”ฐ๋ผ ์•„์ด์ฝ˜๊ณผ data-asc๋ฅผ ๋ฐ˜์ „
74
+ if sort_asc:
75
+ # ์˜ค๋ฆ„์ฐจ์ˆœ ์ƒํƒœ: โ–ผ ์•„์ด์ฝ˜, ํด๋ฆญ ์‹œ ๋‚ด๋ฆผ์ฐจ์ˆœ
76
+ svg = (
77
+ '<svg width="14" height="14" viewBox="0 0 14 14" style="vertical-align:middle">'
78
+ '<polygon points="3,5 11,5 7,11" fill="currentColor"/></svg>'
79
+ )
80
+ return (
81
+ f'<span class="sort-arrow active" data-col="{col}" data-asc="false" aria-label="๋‚ด๋ฆผ์ฐจ์ˆœ ์ •๋ ฌ">{svg}</span>'
82
+ )
83
+ else:
84
+ # ๋‚ด๋ฆผ์ฐจ์ˆœ ์ƒํƒœ: โ–ฒ ์•„์ด์ฝ˜, ํด๋ฆญ ์‹œ ์˜ค๋ฆ„์ฐจ์ˆœ
85
+ svg = (
86
+ '<svg width="14" height="14" viewBox="0 0 14 14" style="vertical-align:middle">'
87
+ '<polygon points="7,3 11,9 3,9" fill="currentColor"/></svg>'
88
+ )
89
+ return (
90
+ f'<span class="sort-arrow active" data-col="{col}" data-asc="true" aria-label="์˜ค๋ฆ„์ฐจ์ˆœ ์ •๋ ฌ">{svg}</span>'
91
+ )
92
+ else:
93
+ # ์ •๋ ฌ ์ค‘์ด ์•„๋‹Œ ์ปฌ๋Ÿผ: โ–ฒ(์˜ค๋ฆ„์ฐจ์ˆœ) ์•„์ด์ฝ˜, ํด๋ฆญ ์‹œ ์˜ค๋ฆ„์ฐจ์ˆœ
94
+ svg = (
95
+ '<svg width="14" height="14" viewBox="0 0 14 14" style="vertical-align:middle">'
96
+ '<polygon points="7,3 11,9 3,9" fill="currentColor"/></svg>'
97
+ )
98
+ return (
99
+ f'<span class="sort-arrow" data-col="{col}" data-asc="true" aria-label="์˜ค๋ฆ„์ฐจ์ˆœ ์ •๋ ฌ">{svg}</span>'
100
+ )
101
+
102
+ # ์ •๋ ฌ ์ƒํƒœ ์ถ”์ถœ (State์—์„œ ์ „๋‹ฌ๋ฐ›๊ฑฐ๋‚˜ ๊ธฐ๋ณธ๊ฐ’)
103
+ sort_col = getattr(df, "_sort_col", None) or (df.columns[0] if len(df.columns) > 0 else None)
104
+ sort_asc = getattr(df, "_sort_asc", None)
105
+ if sort_asc is None:
106
+ sort_asc = True
107
+
108
+ html = '<table class="pretty-leaderboard-table">\n<thead><tr>'
109
+ for col in df.columns:
110
+ if col in hidden_cols:
111
+ continue
112
+ html += f'<th>{col}{get_sort_arrow(col, sort_col, sort_asc)}</th>'
113
+ html += '</tr></thead>\n<tbody>\n'
114
+
115
+ # Build table rows
116
+ for idx, row in df.iterrows():
117
+ html += '<tr>'
118
+ for col in df.columns:
119
+ if col in hidden_cols:
120
+ continue
121
+ cell = row[col]
122
+ if col == overall_col:
123
+ try:
124
+ cell_html = get_score_bar(float(cell))
125
+ except Exception:
126
+ cell_html = str(cell)
127
+ html += f'<td>{cell_html}</td>'
128
+ elif col in ["Model Name"]:
129
+ # 1~3์œ„ ํ•˜์ด๋ผ์ดํŠธ + 4๋“ฑ ์ดํ›„ ํฐ์ƒ‰ + ๋ฑƒ์ง€ ํ•ญ์ƒ ํ‘œ์‹œ
130
+ rank = row.get("Rank", None)
131
+ model_type = row.get("Model Type", None) or row.get("Model_Type", None)
132
+ output_form = row.get("Output Form", None) or row.get("Output_Form", None)
133
+ highlight_style = ""
134
+ if rank == 1 or rank == "1":
135
+ highlight_style = "color: #ffd700; font-weight: bold; text-shadow: 0 0 4px #fff2;"
136
+ elif rank == 2 or rank == "2":
137
+ highlight_style = "color: #b0b0b0; font-weight: bold;"
138
+ elif rank == 3 or rank == "3":
139
+ highlight_style = "color: #cd7f32; font-weight: bold;"
140
+ else:
141
+ highlight_style = "color: #fff; font-weight: 600;"
142
+ badge_html = ""
143
+ if model_type:
144
+ badge_html += " " + get_type_badge(model_type)
145
+ if output_form:
146
+ badge_html += " " + get_output_badge(output_form)
147
+ html += f'<td><span style="{highlight_style}">{cell}</span>{badge_html}</td>'
148
+ else:
149
+ html += f'<td>{cell}</td>'
150
+ html += '</tr>\n'
151
+ html += '</tbody></table>'
152
+ return html
src/display/utils.py CHANGED
@@ -21,24 +21,26 @@ class ColumnContent:
21
  never_hidden: bool = False
22
 
23
  ## Leaderboard columns
 
 
24
  auto_eval_column_dict = []
25
  # Init
26
- auto_eval_column_dict.append(["model_type_symbol", ColumnContent, ColumnContent("T", "str", True, never_hidden=True)])
27
- auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)])
28
- #Scores
29
- auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Average โฌ†๏ธ", "number", True)])
30
  for task in Tasks:
31
- auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
32
  # Model information
33
- auto_eval_column_dict.append(["model_type", ColumnContent, ColumnContent("Type", "str", False)])
34
- auto_eval_column_dict.append(["architecture", ColumnContent, ColumnContent("Architecture", "str", False)])
35
- auto_eval_column_dict.append(["weight_type", ColumnContent, ColumnContent("Weight type", "str", False, True)])
36
- auto_eval_column_dict.append(["precision", ColumnContent, ColumnContent("Precision", "str", False)])
37
- auto_eval_column_dict.append(["license", ColumnContent, ColumnContent("Hub License", "str", False)])
38
- auto_eval_column_dict.append(["params", ColumnContent, ColumnContent("#Params (B)", "number", False)])
39
- auto_eval_column_dict.append(["likes", ColumnContent, ColumnContent("Hub โค๏ธ", "number", False)])
40
- auto_eval_column_dict.append(["still_on_hub", ColumnContent, ColumnContent("Available on the hub", "bool", False)])
41
- auto_eval_column_dict.append(["revision", ColumnContent, ColumnContent("Model sha", "str", False, False)])
42
 
43
  # We use make dataclass to dynamically fill the scores from Tasks
44
  AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)
@@ -113,4 +115,3 @@ EVAL_COLS = [c.name for c in fields(EvalQueueColumn)]
113
  EVAL_TYPES = [c.type for c in fields(EvalQueueColumn)]
114
 
115
  BENCHMARK_COLS = [t.value.col_name for t in Tasks]
116
-
 
21
  never_hidden: bool = False
22
 
23
  ## Leaderboard columns
24
+ from dataclasses import field
25
+
26
  auto_eval_column_dict = []
27
  # Init
28
+ auto_eval_column_dict.append(["model_type_symbol", ColumnContent, field(default_factory=lambda: ColumnContent("T", "str", True, never_hidden=True))])
29
+ auto_eval_column_dict.append(["model", ColumnContent, field(default_factory=lambda: ColumnContent("Model", "markdown", True, never_hidden=True))])
30
+ # Scores
31
+ auto_eval_column_dict.append(["average", ColumnContent, field(default_factory=lambda: ColumnContent("Average โฌ†๏ธ", "number", True))])
32
  for task in Tasks:
33
+ auto_eval_column_dict.append([task.name, ColumnContent, field(default_factory=lambda t=task: ColumnContent(t.value.col_name, "number", True))])
34
  # Model information
35
+ auto_eval_column_dict.append(["model_type", ColumnContent, field(default_factory=lambda: ColumnContent("Type", "str", False))])
36
+ auto_eval_column_dict.append(["architecture", ColumnContent, field(default_factory=lambda: ColumnContent("Architecture", "str", False))])
37
+ auto_eval_column_dict.append(["weight_type", ColumnContent, field(default_factory=lambda: ColumnContent("Weight type", "str", False, True))])
38
+ auto_eval_column_dict.append(["precision", ColumnContent, field(default_factory=lambda: ColumnContent("Precision", "str", False))])
39
+ auto_eval_column_dict.append(["license", ColumnContent, field(default_factory=lambda: ColumnContent("Hub License", "str", False))])
40
+ auto_eval_column_dict.append(["params", ColumnContent, field(default_factory=lambda: ColumnContent("#Params (B)", "number", False))])
41
+ auto_eval_column_dict.append(["likes", ColumnContent, field(default_factory=lambda: ColumnContent("Hub โค๏ธ", "number", False))])
42
+ auto_eval_column_dict.append(["still_on_hub", ColumnContent, field(default_factory=lambda: ColumnContent("Available on the hub", "bool", False))])
43
+ auto_eval_column_dict.append(["revision", ColumnContent, field(default_factory=lambda: ColumnContent("Model sha", "str", False, False))])
44
 
45
  # We use make dataclass to dynamically fill the scores from Tasks
46
  AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)
 
115
  EVAL_TYPES = [c.type for c in fields(EvalQueueColumn)]
116
 
117
  BENCHMARK_COLS = [t.value.col_name for t in Tasks]
 
src/envs.py CHANGED
@@ -6,7 +6,7 @@ from huggingface_hub import HfApi
6
  # ----------------------------------
7
  TOKEN = os.environ.get("HF_TOKEN") # A read/write token for your org
8
 
9
- OWNER = "Jongyoon-Song" # Change to your org - don't forget to create a results and request dataset, with the correct format!
10
  # ----------------------------------
11
 
12
  REPO_ID = f"{OWNER}/test_space"
 
6
  # ----------------------------------
7
  TOKEN = os.environ.get("HF_TOKEN") # A read/write token for your org
8
 
9
+ OWNER = "coms1580" # Change to your org - don't forget to create a results and request dataset, with the correct format!
10
  # ----------------------------------
11
 
12
  REPO_ID = f"{OWNER}/test_space"
src/submission/check_validity.py CHANGED
@@ -95,6 +95,6 @@ def already_submitted_models(requested_models_dir: str) -> set[str]:
95
  if info["model"].count("/") == 0 or "submitted_time" not in info:
96
  continue
97
  organisation, _ = info["model"].split("/")
98
- users_to_submission_dates[organisation].extend([{"benchmark": info['benchmark'], "submitted_time": info["submitted_time"]}])
99
 
100
  return set(file_names), users_to_submission_dates
 
95
  if info["model"].count("/") == 0 or "submitted_time" not in info:
96
  continue
97
  organisation, _ = info["model"].split("/")
98
+ users_to_submission_dates[organisation].extend([{"benchmark": info['benchmark'], "model": info["model"], "submitted_time": info["submitted_time"]}])
99
 
100
  return set(file_names), users_to_submission_dates
src/submission/submit.py CHANGED
@@ -56,7 +56,7 @@ def add_new_eval_option1(
56
  hours_diff = (datetime.strptime(current_time, "%Y-%m-%dT%H:%M:%S %z") - datetime.strptime(submission_times[i], "%Y-%m-%dT%H:%M:%S %z")).total_seconds() / 3600
57
  if hours_diff <= 24:
58
  submission_cnt += 1
59
- if submission_cnt >= 3:
60
  return styled_error("The organization already submitted three times for this benchmark today.")
61
 
62
  # Does the model actually exist?
@@ -127,8 +127,14 @@ def add_new_eval_option1(
127
  }
128
 
129
  # Check for duplicate submission
130
- if f"{benchmark}_{model}" in REQUESTED_MODELS:
131
- return styled_warning("This model has been already submitted.")
 
 
 
 
 
 
132
 
133
  print("Creating eval file")
134
  OUT_DIR = f"{EVAL_REQUESTS_PATH}/{user_name}"
@@ -193,14 +199,13 @@ def add_new_eval_option2(
193
  return styled_error("The submitter does not have submission rights for this model.")
194
 
195
  # Does the organization submit more than three times in a day?
196
- print(USERS_TO_SUBMISSION_DATES)
197
  submission_times = [item['submitted_time'] for item in USERS_TO_SUBMISSION_DATES[user_name] if item['benchmark'] == benchmark]
198
  submission_cnt = 0
199
  for i in range(len(submission_times)):
200
  hours_diff = (datetime.strptime(current_time, "%Y-%m-%dT%H:%M:%S %z") - datetime.strptime(submission_times[i], "%Y-%m-%dT%H:%M:%S %z")).total_seconds() / 3600
201
  if hours_diff <= 24:
202
  submission_cnt += 1
203
- if submission_cnt >= 3:
204
  return styled_error("The organization already submitted three times for this benchmark today.")
205
 
206
  # Does the model actually exist?
@@ -271,8 +276,14 @@ def add_new_eval_option2(
271
  }
272
 
273
  # Check for duplicate submission
274
- if f"{benchmark}_{model}" in REQUESTED_MODELS:
275
- return styled_warning("This model has been already submitted.")
 
 
 
 
 
 
276
 
277
  print("Creating eval file")
278
  OUT_DIR = f"{EVAL_REQUESTS_PATH}/{user_name}"
 
56
  hours_diff = (datetime.strptime(current_time, "%Y-%m-%dT%H:%M:%S %z") - datetime.strptime(submission_times[i], "%Y-%m-%dT%H:%M:%S %z")).total_seconds() / 3600
57
  if hours_diff <= 24:
58
  submission_cnt += 1
59
+ if submission_cnt > 3:
60
  return styled_error("The organization already submitted three times for this benchmark today.")
61
 
62
  # Does the model actually exist?
 
127
  }
128
 
129
  # Check for duplicate submission
130
+ submission_times = [item['submitted_time'] for item in USERS_TO_SUBMISSION_DATES[user_name] if item['benchmark'] == benchmark and item['model'] == model]
131
+ submission_cnt = 0
132
+ for i in range(len(submission_times)):
133
+ hours_diff = (datetime.strptime(current_time, "%Y-%m-%dT%H:%M:%S %z") - datetime.strptime(submission_times[i], "%Y-%m-%dT%H:%M:%S %z")).total_seconds() / 3600
134
+ if hours_diff <= 24:
135
+ submission_cnt += 1
136
+ if submission_cnt > 1:
137
+ return styled_warning("This model has been already submitted within 24 hours.")
138
 
139
  print("Creating eval file")
140
  OUT_DIR = f"{EVAL_REQUESTS_PATH}/{user_name}"
 
199
  return styled_error("The submitter does not have submission rights for this model.")
200
 
201
  # Does the organization submit more than three times in a day?
 
202
  submission_times = [item['submitted_time'] for item in USERS_TO_SUBMISSION_DATES[user_name] if item['benchmark'] == benchmark]
203
  submission_cnt = 0
204
  for i in range(len(submission_times)):
205
  hours_diff = (datetime.strptime(current_time, "%Y-%m-%dT%H:%M:%S %z") - datetime.strptime(submission_times[i], "%Y-%m-%dT%H:%M:%S %z")).total_seconds() / 3600
206
  if hours_diff <= 24:
207
  submission_cnt += 1
208
+ if submission_cnt > 3:
209
  return styled_error("The organization already submitted three times for this benchmark today.")
210
 
211
  # Does the model actually exist?
 
276
  }
277
 
278
  # Check for duplicate submission
279
+ submission_times = [item['submitted_time'] for item in USERS_TO_SUBMISSION_DATES[user_name] if item['benchmark'] == benchmark and item['model'] == model]
280
+ submission_cnt = 0
281
+ for i in range(len(submission_times)):
282
+ hours_diff = (datetime.strptime(current_time, "%Y-%m-%dT%H:%M:%S %z") - datetime.strptime(submission_times[i], "%Y-%m-%dT%H:%M:%S %z")).total_seconds() / 3600
283
+ if hours_diff <= 24:
284
+ submission_cnt += 1
285
+ if submission_cnt > 1:
286
+ return styled_warning("This model has been already submitted within 24 hours.")
287
 
288
  print("Creating eval file")
289
  OUT_DIR = f"{EVAL_REQUESTS_PATH}/{user_name}"
ui.py ADDED
@@ -0,0 +1,228 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import src.config as configs
3
+ from constants import TAB_NAMES, MODEL_TYPE_MAP, OUTPUT_FORM_MAP
4
+ from src.display.formatting import render_leaderboard_html
5
+ from src.display.css_html_js import get_leaderboard_table_html, custom_css
6
+ import pandas as pd
7
+ from constants import LEADERBOARD_REQUIRED_COLUMNS
8
+
9
+ def render_pretty_leaderboard_html(df):
10
+ """
11
+ Renders a pretty leaderboard table using badge and gauge.
12
+ Supports both ['Model', 'Score'] and ['Model Name', 'Overall'] columns.
13
+ Sorts by score descending and rounds for display.
14
+ """
15
+ # Flexible column mapping
16
+ col_map = {}
17
+ if "Model" in df.columns:
18
+ col_map["Model"] = "Model"
19
+ elif "Model Name" in df.columns:
20
+ col_map["Model"] = "Model Name"
21
+ else:
22
+ return "<div style='color:red'>DataFrame must have a 'Model' or 'Model Name' column.</div>"
23
+ if "Score" in df.columns:
24
+ col_map["Score"] = "Score"
25
+ elif "Overall" in df.columns:
26
+ col_map["Score"] = "Overall"
27
+ else:
28
+ return "<div style='color:red'>DataFrame must have a 'Score' or 'Overall' column.</div>"
29
+
30
+ # Example mappings for demonstration (expand as needed)
31
+ model_type_map = MODEL_TYPE_MAP
32
+ output_form_map = OUTPUT_FORM_MAP
33
+
34
+ # Copy and rename for uniformity
35
+ df2 = df.copy()
36
+ df2 = df2.rename(columns={col_map["Model"]: "Model", col_map["Score"]: "Score"})
37
+
38
+ # ๋งคํ•‘ ์ „ํ›„๋กœ ๋ˆ„๋ฝ๋œ ๋ชจ๋ธ๋ช…์„ ์ถœ๋ ฅ (๋””๋ฒ„๊น…์šฉ)
39
+ missing_type = set(df2["Model"]) - set(model_type_map.keys())
40
+ missing_output = set(df2["Model"]) - set(output_form_map.keys())
41
+ if missing_type:
42
+ print("Model Type ๋งคํ•‘ ๋ˆ„๋ฝ:", missing_type)
43
+ if missing_output:
44
+ print("Output Form ๋งคํ•‘ ๋ˆ„๋ฝ:", missing_output)
45
+
46
+ # Add badge columns
47
+ df2["Model Type"] = df2["Model"].map(model_type_map).fillna("open")
48
+ df2["Output Form"] = df2["Model"].map(output_form_map).fillna("normal")
49
+ # Drop NA, sort, round
50
+ df2 = df2[["Model", "Score", "Model Type", "Output Form"]].dropna()
51
+ df2["Score"] = pd.to_numeric(df2["Score"], errors="coerce").round(2)
52
+ df2 = df2.sort_values("Score", ascending=False).reset_index(drop=True)
53
+
54
+ return get_leaderboard_table_html(df2)
55
+
56
+ def create_leaderboard_tab(df, key, search_leaderboard, update_modelselector_group, update_leaderboard, column_selector_value):
57
+ """
58
+ df: DataFrame to display
59
+ key: "Category" or "Language"
60
+ search_leaderboard, update_modelselector_group, update_leaderboard: handler functions
61
+ column_selector_value: default columns to select
62
+ """
63
+ with gr.TabItem(
64
+ TAB_NAMES[key],
65
+ visible=True
66
+ ):
67
+ df_state = gr.State(df)
68
+
69
+ with gr.Row():
70
+ with gr.Column():
71
+ search_box = gr.Textbox(label="Search Model by Name")
72
+ group_list = df["Group"].unique().tolist()
73
+ group_selector = gr.CheckboxGroup(
74
+ choices=df["Group"].unique().tolist(),
75
+ value=group_list,
76
+ label="Select Model Group"
77
+ )
78
+ # ํ•„์ˆ˜ ์ปฌ๋Ÿผ ํ•ญ์ƒ ํฌํ•จ, ์ฒดํฌ ํ•ด์ œ ๋ถˆ๊ฐ€(disabled)
79
+ # ์„ ํƒ์ง€์—์„œ "Model Name", "Group", "Overall" ์ œ์™ธ
80
+ exclude_cols = {"Model Name", "Group", "Overall"}
81
+ selectable_columns = [col for col in df.columns.tolist()[3:] if col not in exclude_cols]
82
+ all_columns = list(dict.fromkeys(LEADERBOARD_REQUIRED_COLUMNS + selectable_columns))
83
+ column_selector = gr.CheckboxGroup(
84
+ choices=selectable_columns,
85
+ value=[col for col in column_selector_value if col in selectable_columns],
86
+ label="Select Columns"
87
+ )
88
+
89
+ with gr.Column():
90
+ with gr.Accordion("Model List", open=False):
91
+ model_group = df["Model Name"].tolist()
92
+ model_selector = gr.CheckboxGroup(
93
+ choices=df["Model Name"].tolist(),
94
+ value=model_group,
95
+ label="Select Models"
96
+ )
97
+
98
+ # badge ์ •๋ณด ํฌํ•จ DataFrame ์ƒ์„ฑ (์œ„์ชฝ ํ…Œ์ด๋ธ”์šฉ)
99
+ df_badge = df.copy()
100
+ # Model ์ปฌ๋Ÿผ๋ช… ํ†ต์ผ
101
+ if "Model Name" in df_badge.columns:
102
+ df_badge["Model"] = df_badge["Model Name"]
103
+ # ์˜ˆ์‹œ ๋งคํ•‘ (์•„๋ž˜์ชฝ๊ณผ ๋™์ผํ•˜๊ฒŒ ํ™•์žฅ)
104
+ model_type_map = MODEL_TYPE_MAP
105
+ output_form_map = OUTPUT_FORM_MAP
106
+ df_badge["Model Type"] = df_badge["Model"].map(model_type_map).fillna("open")
107
+ df_badge["Output Form"] = df_badge["Model"].map(output_form_map).fillna("normal")
108
+ df_badge = df_badge.sort_values("Overall" if "Overall" in df_badge.columns else "Score", ascending=False).reset_index(drop=True)
109
+ df_badge["Rank"] = df_badge.index + 1
110
+
111
+ # ์ •๋ ฌ ์ƒํƒœ ๊ด€๋ฆฌ์šฉ State (ํ•œ ๋ฒˆ๋งŒ ์ƒ์„ฑ, ์ดํ›„ ์žฌ์‚ฌ์šฉ)
112
+ default_sort_col = "Overall" if "Overall" in df_badge.columns else "Score"
113
+ sort_col_state = gr.State(default_sort_col)
114
+ sort_asc_state = gr.State(False) # ๋‚ด๋ฆผ์ฐจ์ˆœ์ด ๊ธฐ๋ณธ๊ฐ’
115
+
116
+ # ์ •๋ ฌ ํ•จ์ˆ˜ (JS์—์„œ ๋„˜๊ธด asc ๊ฐ’์„ ๊ทธ๋Œ€๋กœ ์‚ฌ์šฉ)
117
+ def sort_and_render(col, asc, models, columns, df_):
118
+ print(f"[sort_and_render] called: col={col}, asc={asc}, models={models}, columns={columns}")
119
+ filtered_df = update_leaderboard(models, columns, df_, col, asc)
120
+ # ์ •๋ ฌ ์ƒํƒœ๋ฅผ DataFrame์— ์ž„์‹œ๋กœ ์ €์žฅํ•ด ํ—ค๋”์— ๋ฐ˜์˜
121
+ filtered_df._sort_col = col
122
+ filtered_df._sort_asc = asc
123
+ return render_leaderboard_html(filtered_df.round(3)), col, asc
124
+
125
+ leaderboard_html = render_leaderboard_html(df_badge.round(3))
126
+ leaderboard_html_comp = gr.HTML(value=leaderboard_html, elem_id="leaderboard-table")
127
+
128
+ # ์ •๋ ฌ ํŠธ๋ฆฌ๊ฑฐ์šฉ hidden textbox ์ถ”๊ฐ€
129
+ sort_trigger = gr.Textbox(visible=False, elem_id="sort-leaderboard-trigger")
130
+
131
+ # sort-arrow ํด๋ฆญ ์‹œ ํ•ญ์ƒ ์ƒˆ๋กœ์šด ๊ฐ’์œผ๋กœ value๋ฅผ ๋ณ€๊ฒฝํ•˜๋Š” JS ์‚ฝ์ž… (์ •๋ ฌ ๋ฐฉํ–ฅ ํฌํ•จ)
132
+ sort_js = """
133
+ <script>
134
+ (function() {
135
+ document.addEventListener('DOMContentLoaded', function() {
136
+ const table = document.getElementById('leaderboard-table');
137
+ if (!table) return;
138
+ table.addEventListener('click', function(e) {
139
+ const arrow = e.target.closest('.sort-arrow');
140
+ if (arrow) {
141
+ const col = arrow.getAttribute('data-col');
142
+ const asc = arrow.getAttribute('data-asc');
143
+ // ํ•ญ์ƒ ์ƒˆ๋กœ์šด ๊ฐ’์œผ๋กœ value๋ฅผ ๋ณ€๊ฒฝํ•˜์—ฌ change ์ด๋ฒคํŠธ ๊ฐ•์ œ ๋ฐœ์ƒ
144
+ const trigger = document.querySelector('#sort-leaderboard-trigger input');
145
+ if (trigger) {
146
+ trigger.value = col + '|' + asc + '|' + Date.now();
147
+ trigger.dispatchEvent(new Event('input', { bubbles: true }));
148
+ trigger.dispatchEvent(new Event('change', { bubbles: true }));
149
+ }
150
+ }
151
+ });
152
+ });
153
+ })();
154
+ </script>
155
+ """
156
+ # ์ •๋ ฌ ๋ฒ„ํŠผ ํด๋ฆญ ์‹œ์—๋„ update_leaderboard๋ฅผ ํ˜ธ์ถœํ•˜๋„๋ก wiring
157
+ def sort_trigger_change(col_val, models, columns, df_, prev_col, prev_asc):
158
+ print(f"[sort_trigger.change] col_val={col_val}, prev_col={prev_col}, prev_asc={prev_asc}")
159
+ col, asc = col_val.split('|')[0], col_val.split('|')[1].lower() == "true"
160
+ return sort_and_render(col, asc, models, columns, df_)
161
+
162
+ sort_trigger.change(
163
+ fn=sort_trigger_change,
164
+ inputs=[sort_trigger, model_selector, column_selector, df_state, sort_col_state, sort_asc_state],
165
+ outputs=[leaderboard_html_comp, sort_col_state, sort_asc_state]
166
+ )
167
+
168
+ # ์ปค์Šคํ…€ JS๋ฅผ ์ƒ๋‹จ ํ…Œ์ด๋ธ”์— ์‚ฝ์ž…
169
+ leaderboard_html_comp.style = None # gr.HTML์—๋Š” style ํŒŒ๋ผ๋ฏธํ„ฐ๊ฐ€ ์—†์œผ๋ฏ€๋กœ, ์•„๋ž˜์—์„œ ์‚ฝ์ž…
170
+ leaderboard_html_comp.value += sort_js
171
+
172
+ # Pretty leaderboard preview (uses only 'Model' and 'Score' columns)
173
+ pretty_html = gr.HTML(value=render_pretty_leaderboard_html(df.round(3)))
174
+
175
+ # Define change functions for user interaction
176
+ # ๋ชจ๋“  UI ์ด๋ฒคํŠธ์—์„œ update_leaderboard โ†’ sort_and_render โ†’ render_leaderboard_html ์ˆœ์œผ๋กœ ๊ฐฑ์‹ 
177
+ def filter_and_sort_search(query, df, sort_col, sort_asc):
178
+ print(f"[filter_and_sort_search] sort_col={sort_col}, sort_asc={sort_asc}")
179
+ filtered_df = search_leaderboard(query, df, sort_col, sort_asc)
180
+ # ์ •๋ ฌ ์ƒํƒœ๋ฅผ DataFrame์— ์ž„์‹œ๋กœ ์ €์žฅํ•ด ํ—ค๋”์— ๋ฐ˜์˜
181
+ filtered_df._sort_col = sort_col
182
+ filtered_df._sort_asc = sort_asc
183
+ return render_leaderboard_html(filtered_df), sort_col, sort_asc
184
+
185
+ def filter_and_sort_model(models, columns, df, sort_col, sort_asc):
186
+ print(f"[filter_and_sort_model] sort_col={sort_col}, sort_asc={sort_asc}")
187
+ filtered_df = update_leaderboard(models, columns, df, sort_col, sort_asc)
188
+ filtered_df._sort_col = sort_col
189
+ filtered_df._sort_asc = sort_asc
190
+ return render_leaderboard_html(filtered_df), sort_col, sort_asc
191
+
192
+ def filter_and_sort_column(models, columns, df, sort_col, sort_asc):
193
+ print(f"[filter_and_sort_column] sort_col={sort_col}, sort_asc={sort_asc}")
194
+ filtered_df = update_leaderboard(models, columns, df, sort_col, sort_asc)
195
+ filtered_df._sort_col = sort_col
196
+ filtered_df._sort_asc = sort_asc
197
+ return render_leaderboard_html(filtered_df), sort_col, sort_asc
198
+
199
+ search_box.change(
200
+ fn=filter_and_sort_search,
201
+ inputs=[search_box, df_state, sort_col_state, sort_asc_state],
202
+ outputs=[leaderboard_html_comp, sort_col_state, sort_asc_state]
203
+ )
204
+
205
+ group_selector.change(fn=update_modelselector_group, inputs=[group_selector, df_state], outputs=model_selector)
206
+ model_selector.change(
207
+ fn=filter_and_sort_model,
208
+ inputs=[model_selector, column_selector, df_state, sort_col_state, sort_asc_state],
209
+ outputs=[leaderboard_html_comp, sort_col_state, sort_asc_state]
210
+ )
211
+
212
+ # column_selector ๋ณ€๊ฒฝ ์‹œ์—๋„ ํ•ญ์ƒ ์ตœ์‹  sort_col, sort_asc๋ฅผ ์œ ์ง€
213
+ column_selector.change(
214
+ fn=filter_and_sort_column,
215
+ inputs=[model_selector, column_selector, df_state, sort_col_state, sort_asc_state],
216
+ outputs=[leaderboard_html_comp, sort_col_state, sort_asc_state]
217
+ )
218
+
219
+ return {
220
+ "search_box": search_box,
221
+ "group_selector": group_selector,
222
+ "column_selector": column_selector,
223
+ "model_selector": model_selector,
224
+ "leaderboard_html_comp": leaderboard_html_comp,
225
+ "sort_trigger": sort_trigger,
226
+ "df_state": df_state,
227
+ "pretty_html": pretty_html
228
+ }
utils.py CHANGED
@@ -25,4 +25,18 @@ def get_profile_and_organizations(profile: gr.OAuthProfile | None, oauth_token:
25
  else:
26
  output_org = [org["name"] for org in whoami(oauth_token.token)["orgs"]]
27
 
28
- return output_profile, output_org
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  else:
26
  output_org = [org["name"] for org in whoami(oauth_token.token)["orgs"]]
27
 
28
+ return output_profile, output_org
29
+
30
+ def download_with_restart(snapshot_download_func, repo_id, local_dir, repo_type, token, restart_func):
31
+ try:
32
+ print(local_dir)
33
+ snapshot_download_func(
34
+ repo_id=repo_id,
35
+ local_dir=local_dir,
36
+ repo_type=repo_type,
37
+ tqdm_class=None,
38
+ etag_timeout=30,
39
+ token=token
40
+ )
41
+ except Exception:
42
+ restart_func()