ha251 commited on
Commit
31001ef
·
verified ·
1 Parent(s): de2f793

Update miniapp_leaderboard.py

Browse files
Files changed (1) hide show
  1. miniapp_leaderboard.py +106 -10
miniapp_leaderboard.py CHANGED
@@ -42,13 +42,13 @@ def _slug(s: str):
42
 
43
  def _load_df(prefix: str):
44
  if not HF_TOKEN or not LEADERBOARD_DATASET:
45
- return pd.DataFrame(columns=COLUMNS)
46
 
47
  api = _api()
48
  try:
49
  files = api.list_repo_files(repo_id=LEADERBOARD_DATASET, repo_type="dataset")
50
  except Exception:
51
- return pd.DataFrame(columns=COLUMNS)
52
 
53
  files = [f for f in files if f.startswith(prefix) and f.endswith(".json")]
54
  rows = []
@@ -67,9 +67,10 @@ def _load_df(prefix: str):
67
  continue
68
 
69
  if not rows:
70
- return pd.DataFrame(columns=COLUMNS)
71
 
72
  df = pd.DataFrame(rows)
 
73
  for c in COLUMNS:
74
  if c not in df.columns:
75
  df[c] = ""
@@ -77,9 +78,17 @@ def _load_df(prefix: str):
77
  for c in NUMERIC_COLS:
78
  df[c] = pd.to_numeric(df[c], errors="coerce")
79
 
80
- df = df.sort_values(by="avg", ascending=False)
81
- return df[COLUMNS]
 
 
 
 
 
 
 
82
 
 
83
 
84
  def refresh():
85
  return _load_df(APPROVED_PREFIX)
@@ -134,34 +143,121 @@ def submit(model_name, model_family, email, zip_file):
134
 
135
  return "Submitted. Waiting for review.", refresh()
136
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
137
 
138
- with gr.Blocks(title=f"{APP_NAME} leaderboard") as demo:
139
- gr.Markdown(f"# {APP_NAME} Leaderboard")
140
 
141
- # 占满横向宽度
 
 
 
 
 
 
 
 
 
142
  leaderboard = gr.Dataframe(
143
  value=_load_df(APPROVED_PREFIX),
144
  interactive=False,
145
  wrap=True,
146
- elem_classes="full-width",
147
  )
148
 
149
  refresh_btn = gr.Button("Refresh")
150
 
151
- gr.Markdown("## Submit")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
152
 
153
  model_name = gr.Textbox(label="Model name")
154
  model_family = gr.Textbox(label="Model family")
155
  email = gr.Textbox(label="Email")
156
  zip_file = gr.File(label="Upload zip", file_types=[".zip"])
 
157
  submit_btn = gr.Button("Submit", variant="primary")
158
  status = gr.Markdown()
159
 
160
  refresh_btn.click(refresh, outputs=[leaderboard])
 
161
  submit_btn.click(
162
  submit,
163
  inputs=[model_name, model_family, email, zip_file],
164
  outputs=[status, leaderboard],
165
  )
166
 
 
167
  demo.launch()
 
42
 
43
  def _load_df(prefix: str):
44
  if not HF_TOKEN or not LEADERBOARD_DATASET:
45
+ return pd.DataFrame(columns=["rank"] + COLUMNS)
46
 
47
  api = _api()
48
  try:
49
  files = api.list_repo_files(repo_id=LEADERBOARD_DATASET, repo_type="dataset")
50
  except Exception:
51
+ return pd.DataFrame(columns=["rank"] + COLUMNS)
52
 
53
  files = [f for f in files if f.startswith(prefix) and f.endswith(".json")]
54
  rows = []
 
67
  continue
68
 
69
  if not rows:
70
+ return pd.DataFrame(columns=["rank"] + COLUMNS)
71
 
72
  df = pd.DataFrame(rows)
73
+
74
  for c in COLUMNS:
75
  if c not in df.columns:
76
  df[c] = ""
 
78
  for c in NUMERIC_COLS:
79
  df[c] = pd.to_numeric(df[c], errors="coerce")
80
 
81
+ # avg 排序
82
+ df = df.sort_values(by="avg", ascending=False).reset_index(drop=True)
83
+
84
+ # 自动生成排名
85
+ df.insert(0, "rank", df.index + 1)
86
+
87
+ # 加 medal
88
+ medals = {1: "🥇", 2: "🥈", 3: "🥉"}
89
+ df["rank"] = df["rank"].apply(lambda x: f"{medals.get(x, '')} {x}")
90
 
91
+ return df[["rank"] + COLUMNS]
92
 
93
  def refresh():
94
  return _load_df(APPROVED_PREFIX)
 
143
 
144
  return "Submitted. Waiting for review.", refresh()
145
 
146
+ custom_css = """
147
+ .gradio-container {
148
+ max-width: 100% !important;
149
+ padding-left: 2rem !important;
150
+ padding-right: 2rem !important;
151
+ }
152
+
153
+ thead tr th {
154
+ position: sticky;
155
+ top: 0;
156
+ background: white !important;
157
+ z-index: 10;
158
+ }
159
+
160
+ table {
161
+ font-size: 14px;
162
+ }
163
+
164
+ @media (max-width: 768px) {
165
+ table {
166
+ font-size: 12px;
167
+ }
168
+ }
169
+ """
170
+
171
+ with gr.Blocks(
172
+ title=f"{APP_NAME} leaderboard",
173
+ css=custom_css,
174
+ fill_width=True,
175
+ ) as demo:
176
+
177
+ # =============================
178
+ # 标题
179
+ # =============================
180
+ gr.Markdown(f"# {APP_NAME} Benchmark Leaderboard")
181
+
182
+ # =============================
183
+ # 学术风格说明
184
+ # =============================
185
+ gr.Markdown(
186
+ """
187
+ ### Overview
188
+
189
+ This leaderboard reports performance on **MiniAppBench**,
190
+ a benchmark designed to evaluate model capability across
191
+ multi-difficulty application scenarios.
192
+
193
+ ### Evaluation Protocol
194
+
195
+ All submissions are evaluated under a standardized pipeline.
196
+ Each model is tested on three difficulty tiers:
197
+
198
+ - **Easy**
199
+ - **Mid**
200
+ - **Hard**
201
+
202
+ The final **Average (avg)** score is computed as the unweighted mean
203
+ across all tiers.
204
 
205
+ ### Ranking Policy
 
206
 
207
+ Models are ranked by **avg score (descending)**.
208
+ Ties are broken by Hard score, then Mid score.
209
+
210
+ Only reviewed and approved submissions are displayed.
211
+ """
212
+ )
213
+
214
+ # =============================
215
+ # Leaderboard 表格
216
+ # =============================
217
  leaderboard = gr.Dataframe(
218
  value=_load_df(APPROVED_PREFIX),
219
  interactive=False,
220
  wrap=True,
221
+ height=600,
222
  )
223
 
224
  refresh_btn = gr.Button("Refresh")
225
 
226
+ # =============================
227
+ # Submission
228
+ # =============================
229
+ gr.Markdown("---")
230
+
231
+ gr.Markdown(
232
+ """
233
+ ## Submission Guidelines
234
+
235
+ Please upload:
236
+
237
+ - A `.zip` file containing your model artifacts
238
+ - Model name
239
+ - Model family
240
+ - Contact email
241
+
242
+ Submissions will undergo verification before appearing on the leaderboard.
243
+ """
244
+ )
245
 
246
  model_name = gr.Textbox(label="Model name")
247
  model_family = gr.Textbox(label="Model family")
248
  email = gr.Textbox(label="Email")
249
  zip_file = gr.File(label="Upload zip", file_types=[".zip"])
250
+
251
  submit_btn = gr.Button("Submit", variant="primary")
252
  status = gr.Markdown()
253
 
254
  refresh_btn.click(refresh, outputs=[leaderboard])
255
+
256
  submit_btn.click(
257
  submit,
258
  inputs=[model_name, model_family, email, zip_file],
259
  outputs=[status, leaderboard],
260
  )
261
 
262
+
263
  demo.launch()