Spaces:

MiniAppBench
/

Leaderboard

Running

ha251 commited on Mar 3

Commit

13aea08

verified ·

1 Parent(s): 5ef4585

Update miniapp_leaderboard.py

Files changed (1) hide show

miniapp_leaderboard.py CHANGED Viewed

@@ -161,6 +161,13 @@ def submit(model_name, model_family, zip_file, profile: gr.OAuthProfile):
 with gr.Blocks(title=f"{APP_NAME} leaderboard") as demo:
     gr.Markdown(f"# {APP_NAME} Leaderboard")
     leaderboard = gr.Dataframe(
         value=pd.DataFrame(columns=COLUMNS),  # 启动不访问Hub
@@ -173,24 +180,6 @@ with gr.Blocks(title=f"{APP_NAME} leaderboard") as demo:
     # 页面加载时自动刷新一次
     demo.load(refresh, outputs=[leaderboard])
-# with gr.Blocks(
-#     title=f"{APP_NAME} leaderboard",
-#     css="""
-#     .gradio-container { font-size: 18px; }
-#     /* 表格字号单独再加一层，确保生效 */
-#     .gradio-container table { font-size: 18px; }
-#     """
-# ) as demo:
-#     gr.Markdown(f"# {APP_NAME} Leaderboard")
-#     # leaderboard = gr.Dataframe(value=_load_df(APPROVED_PREFIX), interactive=False, wrap=True)
-#     leaderboard = gr.Dataframe(value=pd.DataFrame(columns=COLUMNS), interactive=False, wrap=True)
-#     demo.load(fn=refresh, outputs=[leaderboard])   # 页面打开自动加载一次
-#     refresh_btn.click(refresh, outputs=[leaderboard])
-#     refresh_btn = gr.Button("Refresh")
     gr.Markdown("## Submit")

 with gr.Blocks(title=f"{APP_NAME} leaderboard") as demo:
     gr.Markdown(f"# {APP_NAME} Leaderboard")
+gr.Markdown("""
+### Data
+MiniAppBench is the first comprehensive benchmark designed to evaluate principle-driven, interactive application generation. Unlike prior benchmarks that emphasize static UI layouts or isolated algorithmic code snippets, MiniAppBench targets **MiniApps**—HTML-based applications that require both faithful visual rendering and non-trivial interaction logic.
+The dataset is split into two subsets: **validation (100 instances)** and **test (400 instances)**, and can be accessed at **[MiniAppBench dataset](https://huggingface.co/datasets/MiniAppBench/Dataset)**. The **validation** set includes publicly available **evaluation references** to support reproducible experiments, while the **test** set keeps the references hidden to enable unbiased evaluation.
+""")
     leaderboard = gr.Dataframe(
         value=pd.DataFrame(columns=COLUMNS),  # 启动不访问Hub
     # 页面加载时自动刷新一次
     demo.load(refresh, outputs=[leaderboard])
     gr.Markdown("## Submit")