Spaces:
Running
Running
Update miniapp_leaderboard.py
Browse files- miniapp_leaderboard.py +7 -18
miniapp_leaderboard.py
CHANGED
|
@@ -161,6 +161,13 @@ def submit(model_name, model_family, zip_file, profile: gr.OAuthProfile):
|
|
| 161 |
|
| 162 |
with gr.Blocks(title=f"{APP_NAME} leaderboard") as demo:
|
| 163 |
gr.Markdown(f"# {APP_NAME} Leaderboard")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 164 |
|
| 165 |
leaderboard = gr.Dataframe(
|
| 166 |
value=pd.DataFrame(columns=COLUMNS), # 启动不访问Hub
|
|
@@ -173,24 +180,6 @@ with gr.Blocks(title=f"{APP_NAME} leaderboard") as demo:
|
|
| 173 |
|
| 174 |
# 页面加载时自动刷新一次
|
| 175 |
demo.load(refresh, outputs=[leaderboard])
|
| 176 |
-
# with gr.Blocks(
|
| 177 |
-
# title=f"{APP_NAME} leaderboard",
|
| 178 |
-
# css="""
|
| 179 |
-
# .gradio-container { font-size: 18px; }
|
| 180 |
-
# /* 表格字号单独再加一层,确保生效 */
|
| 181 |
-
# .gradio-container table { font-size: 18px; }
|
| 182 |
-
# """
|
| 183 |
-
# ) as demo:
|
| 184 |
-
|
| 185 |
-
# gr.Markdown(f"# {APP_NAME} Leaderboard")
|
| 186 |
-
|
| 187 |
-
# # leaderboard = gr.Dataframe(value=_load_df(APPROVED_PREFIX), interactive=False, wrap=True)
|
| 188 |
-
# leaderboard = gr.Dataframe(value=pd.DataFrame(columns=COLUMNS), interactive=False, wrap=True)
|
| 189 |
-
|
| 190 |
-
# demo.load(fn=refresh, outputs=[leaderboard]) # 页面打开自动加载一次
|
| 191 |
-
# refresh_btn.click(refresh, outputs=[leaderboard])
|
| 192 |
-
|
| 193 |
-
# refresh_btn = gr.Button("Refresh")
|
| 194 |
|
| 195 |
gr.Markdown("## Submit")
|
| 196 |
|
|
|
|
| 161 |
|
| 162 |
with gr.Blocks(title=f"{APP_NAME} leaderboard") as demo:
|
| 163 |
gr.Markdown(f"# {APP_NAME} Leaderboard")
|
| 164 |
+
gr.Markdown("""
|
| 165 |
+
### Data
|
| 166 |
+
|
| 167 |
+
MiniAppBench is the first comprehensive benchmark designed to evaluate principle-driven, interactive application generation. Unlike prior benchmarks that emphasize static UI layouts or isolated algorithmic code snippets, MiniAppBench targets **MiniApps**—HTML-based applications that require both faithful visual rendering and non-trivial interaction logic.
|
| 168 |
+
|
| 169 |
+
The dataset is split into two subsets: **validation (100 instances)** and **test (400 instances)**, and can be accessed at **[MiniAppBench dataset](https://huggingface.co/datasets/MiniAppBench/Dataset)**. The **validation** set includes publicly available **evaluation references** to support reproducible experiments, while the **test** set keeps the references hidden to enable unbiased evaluation.
|
| 170 |
+
""")
|
| 171 |
|
| 172 |
leaderboard = gr.Dataframe(
|
| 173 |
value=pd.DataFrame(columns=COLUMNS), # 启动不访问Hub
|
|
|
|
| 180 |
|
| 181 |
# 页面加载时自动刷新一次
|
| 182 |
demo.load(refresh, outputs=[leaderboard])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 183 |
|
| 184 |
gr.Markdown("## Submit")
|
| 185 |
|