Spaces:

MiniAppBench
/

Leaderboard

Running

App Files Files Community

ha251 commited on Mar 3

Commit

6c674b5

verified ·

1 Parent(s): 82fe3ca

Update miniapp_leaderboard.py

Browse files

Files changed (1) hide show

miniapp_leaderboard.py +24 -9

miniapp_leaderboard.py CHANGED Viewed

@@ -162,13 +162,21 @@ def submit(model_name, model_family, zip_file, profile: gr.OAuthProfile):
 with gr.Blocks(title=f"{APP_NAME} leaderboard") as demo:
     gr.Markdown(f"# {APP_NAME} Leaderboard")
     gr.Markdown("""
-    ### Data
     MiniAppBench is the first comprehensive benchmark designed to evaluate principle-driven, interactive application generation. Unlike prior benchmarks that emphasize static UI layouts or isolated algorithmic code snippets, MiniAppBench targets **MiniApps**—HTML-based applications that require both faithful visual rendering and non-trivial interaction logic.
     The dataset is split into two subsets: **validation (100 instances)** and **test (400 instances)**, and can be accessed at **[MiniAppBench dataset](https://huggingface.co/datasets/MiniAppBench/Dataset)**. The **validation** set includes publicly available **evaluation references** to support reproducible experiments, while the **test** set keeps the references hidden to enable unbiased evaluation.
     """)
     leaderboard = gr.Dataframe(
         value=pd.DataFrame(columns=COLUMNS),  # 启动不访问Hub
         interactive=False,
@@ -185,16 +193,23 @@ with gr.Blocks(title=f"{APP_NAME} leaderboard") as demo:
     gr.Markdown(
         """
-        **Submission requirements**
-        - Please **sign in with Hugging Face** before submitting.
-        - **One submission per user per day** (UTC).
-        - Upload a **.zip** file only.
-        - The `.zip` must contain the HTML outputs for the **test set queries**.
-          - Each file should be named using the query index: `<index>.html` (e.g., `1.html`, `2.html`, ...).
-        - After you submit, we will update the result in 3 days.
-        """,
     )
     model_name = gr.Textbox(label="Model name", placeholder="e.g. MyModel v1")
     model_family = gr.Textbox(label="Model family", placeholder="e.g. Llama / Qwen / InternLM ...")
     zip_file = gr.File(label="Upload zip (.zip only)", file_types=[".zip"])

 with gr.Blocks(title=f"{APP_NAME} leaderboard") as demo:
     gr.Markdown(f"# {APP_NAME} Leaderboard")
     gr.Markdown("""
+    ## Data
     MiniAppBench is the first comprehensive benchmark designed to evaluate principle-driven, interactive application generation. Unlike prior benchmarks that emphasize static UI layouts or isolated algorithmic code snippets, MiniAppBench targets **MiniApps**—HTML-based applications that require both faithful visual rendering and non-trivial interaction logic.
     The dataset is split into two subsets: **validation (100 instances)** and **test (400 instances)**, and can be accessed at **[MiniAppBench dataset](https://huggingface.co/datasets/MiniAppBench/Dataset)**. The **validation** set includes publicly available **evaluation references** to support reproducible experiments, while the **test** set keeps the references hidden to enable unbiased evaluation.
     """)
+    gr.Markdown(
+    """
+    ## Leaderboard
+    All results shown on this leaderboard are evaluated on the **test split** of MiniAppBench.
+    """,
+    )
     leaderboard = gr.Dataframe(
         value=pd.DataFrame(columns=COLUMNS),  # 启动不访问Hub
         interactive=False,
     gr.Markdown(
         """
+    **Submission requirements**
+    - Please **sign in with Hugging Face** before submitting.
+    - **One submission per user per day (UTC)**.
+    - Upload a **.zip** file only.
+    - The `.zip` must contain the HTML outputs for the **test set queries**.
+      - Each file should be named using the query index: `<index>.html` (e.g., `1.html`, `2.html`, ...).
+    - We may contact you via email for verification and request additional materials. Please be prepared to provide:
+      - **Model access** (one of the following):
+        - Preferred: an **inference API endpoint** we can use to reproduce the results.
+        - Alternatively: **model checkpoints (ckpts)** plus clear **deployment / inference instructions** (environment, dependencies, and how to run).
+      - **A related paper**, if available (e.g., an **arXiv link** or a PDF).
+    - After you submit, we will update the results within **3 days**.
+    """,
     )
     model_name = gr.Textbox(label="Model name", placeholder="e.g. MyModel v1")
     model_family = gr.Textbox(label="Model family", placeholder="e.g. Llama / Qwen / InternLM ...")
     zip_file = gr.File(label="Upload zip (.zip only)", file_types=[".zip"])