Update app.py
Browse files
app.py
CHANGED
|
@@ -308,19 +308,23 @@ def evaluate_predictions(prediction_file, model_name, add_to_leaderboard):
|
|
| 308 |
return f"Error during evaluation: {str(e)}", load_leaderboard()
|
| 309 |
|
| 310 |
initialize_leaderboard_file()
|
|
|
|
|
|
|
| 311 |
with gr.Blocks() as demo:
|
| 312 |
gr.Markdown("""
|
| 313 |
# Competition Title
|
| 314 |
### Welcome to the Competition Overview
|
|
|
|
| 315 |

|
| 316 |
Here you can submit your predictions, view the leaderboard, and track your performance!
|
| 317 |
""")
|
|
|
|
| 318 |
|
| 319 |
with gr.Tabs():
|
| 320 |
with gr.TabItem("📖 Overview"):
|
| 321 |
gr.Markdown("""
|
| 322 |
-
|
| 323 |
-
|
| 324 |
|
| 325 |
Evaluate the performance of mobile-compatible Large Language Models (LLMs) on 16,186 scenario-based and factual questions across 80 fields. Compete to showcase your model’s accuracy for real-world mobile scenarios.
|
| 326 |
|
|
@@ -380,7 +384,7 @@ For questions or support, contact us at: [Insert Email Address]
|
|
| 380 |
model_name_input = gr.Textbox(label="Model Name", placeholder="Enter your model name")
|
| 381 |
|
| 382 |
with gr.Row():
|
| 383 |
-
overall_accuracy_display = gr.
|
| 384 |
add_to_leaderboard_checkbox = gr.Checkbox(label="Add to Leaderboard?", value=True)
|
| 385 |
|
| 386 |
eval_button = gr.Button("Evaluate")
|
|
|
|
| 308 |
return f"Error during evaluation: {str(e)}", load_leaderboard()
|
| 309 |
|
| 310 |
initialize_leaderboard_file()
|
| 311 |
+
|
| 312 |
+
|
| 313 |
with gr.Blocks() as demo:
|
| 314 |
gr.Markdown("""
|
| 315 |
# Competition Title
|
| 316 |
### Welcome to the Competition Overview
|
| 317 |
+
<iframe src="Mobile-MMLU2_.pdf" width="100%" height="500px"></iframe>
|
| 318 |

|
| 319 |
Here you can submit your predictions, view the leaderboard, and track your performance!
|
| 320 |
""")
|
| 321 |
+
|
| 322 |
|
| 323 |
with gr.Tabs():
|
| 324 |
with gr.TabItem("📖 Overview"):
|
| 325 |
gr.Markdown("""
|
| 326 |
+
## Overview
|
| 327 |
+
# Welcome to the Mobile-MMLU Benchmark Competition
|
| 328 |
|
| 329 |
Evaluate the performance of mobile-compatible Large Language Models (LLMs) on 16,186 scenario-based and factual questions across 80 fields. Compete to showcase your model’s accuracy for real-world mobile scenarios.
|
| 330 |
|
|
|
|
| 384 |
model_name_input = gr.Textbox(label="Model Name", placeholder="Enter your model name")
|
| 385 |
|
| 386 |
with gr.Row():
|
| 387 |
+
overall_accuracy_display = gr.Number(label="Overall Accuracy", interactive=False)
|
| 388 |
add_to_leaderboard_checkbox = gr.Checkbox(label="Add to Leaderboard?", value=True)
|
| 389 |
|
| 390 |
eval_button = gr.Button("Evaluate")
|