Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -296,7 +296,7 @@ def run_evaluation(model_id, benchmark_category, subject_name, sample_count, pro
|
|
| 296 |
gr.Error("An error occurred during evaluation.")
|
| 297 |
|
| 298 |
# Return updates for error state
|
| 299 |
-
return "Error occurred during evaluation. We'll evaluate for you
|
| 300 |
gr.update(value=detailed_error_traceback, visible=True), gr.update(visible=True), \
|
| 301 |
gr.update(visible=False), gr.update(visible=False), gr.update(value="", visible=False)
|
| 302 |
|
|
@@ -657,13 +657,13 @@ with gr.Blocks(css="""
|
|
| 657 |
with gr.Column(elem_classes="gr-box"):
|
| 658 |
model_id_input = gr.Textbox(
|
| 659 |
label="Your Hugging Face Model ID",
|
| 660 |
-
placeholder="e.g.,
|
| 661 |
interactive=True
|
| 662 |
)
|
| 663 |
|
| 664 |
# New Radio button for benchmark selection for evaluation
|
| 665 |
benchmark_selection_radio = gr.Radio(
|
| 666 |
-
["MMLU"
|
| 667 |
label="Select Benchmark Type",
|
| 668 |
value="MMLU", # Default selection
|
| 669 |
interactive=True,
|
|
@@ -675,20 +675,22 @@ with gr.Blocks(css="""
|
|
| 675 |
benchmark_subject_dropdown = gr.Dropdown(
|
| 676 |
label="Choose Subject", # Label changed to be more concise
|
| 677 |
choices=INITIAL_GRADIO_DROPDOWN_OPTIONS, # Initial choices (MMLU subjects)
|
| 678 |
-
value="
|
| 679 |
interactive=True,
|
| 680 |
-
min_width=400
|
|
|
|
| 681 |
)
|
| 682 |
sample_count_slider = gr.Slider(
|
| 683 |
label="Number of Samples per Subject (1-100)",
|
| 684 |
minimum=1,
|
| 685 |
maximum=100,
|
| 686 |
-
value=
|
| 687 |
step=1,
|
| 688 |
interactive=True,
|
| 689 |
-
min_width=200
|
|
|
|
| 690 |
)
|
| 691 |
-
run_button = gr.Button("
|
| 692 |
|
| 693 |
gr.Markdown("<hr>") # Visual separator
|
| 694 |
|
|
@@ -701,29 +703,6 @@ with gr.Blocks(css="""
|
|
| 701 |
placeholder="Evaluation results will appear here."
|
| 702 |
)
|
| 703 |
|
| 704 |
-
# Container for debug info, initially hidden
|
| 705 |
-
with gr.Column(visible=False, elem_id="debug-error-column") as debug_error_column:
|
| 706 |
-
error_message_output = gr.Textbox(
|
| 707 |
-
label="Debug Information (Error Details)",
|
| 708 |
-
lines=10, interactive=False, elem_classes="gr-output-text", elem_id="error-message-output",
|
| 709 |
-
placeholder="Error details will appear here if an error occurs."
|
| 710 |
-
)
|
| 711 |
-
debug_button = gr.Button("π Hide Debug Info", visible=True, elem_id="debug-button", elem_classes="gr-button")
|
| 712 |
-
|
| 713 |
-
with gr.Row():
|
| 714 |
-
show_details_button = gr.Button("π Show Detailed Logs", visible=False, elem_id="show-details-button", elem_classes="gr-button")
|
| 715 |
-
download_button = gr.Button("π₯ Download Full Evaluation Logs", visible=False, elem_id="download-button", elem_classes="gr-button")
|
| 716 |
-
|
| 717 |
-
# Detailed output, initially hidden
|
| 718 |
-
detail_output = gr.Textbox(
|
| 719 |
-
label="Detailed Evaluation Logs",
|
| 720 |
-
lines=20,
|
| 721 |
-
interactive=False,
|
| 722 |
-
elem_classes="gr-output-text",
|
| 723 |
-
placeholder="Detailed logs for each question will appear here upon successful evaluation.",
|
| 724 |
-
visible=False # Initially hidden
|
| 725 |
-
)
|
| 726 |
-
|
| 727 |
# Define button click actions
|
| 728 |
run_button.click(
|
| 729 |
run_evaluation,
|
|
@@ -748,12 +727,6 @@ with gr.Blocks(css="""
|
|
| 748 |
inputs=[detail_output], # Pass the component itself as input
|
| 749 |
outputs=[detail_output] # The component to update
|
| 750 |
)
|
| 751 |
-
# Change button text based on visibility
|
| 752 |
-
show_details_button.click(
|
| 753 |
-
lambda s: "π Hide Detailed Logs" if not s else "π Show Detailed Logs",
|
| 754 |
-
inputs=[detail_output],
|
| 755 |
-
outputs=[show_details_button]
|
| 756 |
-
)
|
| 757 |
|
| 758 |
# Toggle visibility of debug error column
|
| 759 |
debug_button.click(
|
|
|
|
| 296 |
gr.Error("An error occurred during evaluation.")
|
| 297 |
|
| 298 |
# Return updates for error state
|
| 299 |
+
return "Error occurred during evaluation. We'll evaluate for you if this persists - please open a community support tab for assistance.", \
|
| 300 |
gr.update(value=detailed_error_traceback, visible=True), gr.update(visible=True), \
|
| 301 |
gr.update(visible=False), gr.update(visible=False), gr.update(value="", visible=False)
|
| 302 |
|
|
|
|
| 657 |
with gr.Column(elem_classes="gr-box"):
|
| 658 |
model_id_input = gr.Textbox(
|
| 659 |
label="Your Hugging Face Model ID",
|
| 660 |
+
placeholder="e.g., ICONNAI/ICONN-1-Mini-Beta",
|
| 661 |
interactive=True
|
| 662 |
)
|
| 663 |
|
| 664 |
# New Radio button for benchmark selection for evaluation
|
| 665 |
benchmark_selection_radio = gr.Radio(
|
| 666 |
+
["MMLU"],
|
| 667 |
label="Select Benchmark Type",
|
| 668 |
value="MMLU", # Default selection
|
| 669 |
interactive=True,
|
|
|
|
| 675 |
benchmark_subject_dropdown = gr.Dropdown(
|
| 676 |
label="Choose Subject", # Label changed to be more concise
|
| 677 |
choices=INITIAL_GRADIO_DROPDOWN_OPTIONS, # Initial choices (MMLU subjects)
|
| 678 |
+
value="all", # Default to ALL for MMLU initially
|
| 679 |
interactive=True,
|
| 680 |
+
min_width=400,
|
| 681 |
+
visible=False
|
| 682 |
)
|
| 683 |
sample_count_slider = gr.Slider(
|
| 684 |
label="Number of Samples per Subject (1-100)",
|
| 685 |
minimum=1,
|
| 686 |
maximum=100,
|
| 687 |
+
value=100,
|
| 688 |
step=1,
|
| 689 |
interactive=True,
|
| 690 |
+
min_width=200,
|
| 691 |
+
visible=False
|
| 692 |
)
|
| 693 |
+
run_button = gr.Button("Run Evaluation", elem_classes="gr-button")
|
| 694 |
|
| 695 |
gr.Markdown("<hr>") # Visual separator
|
| 696 |
|
|
|
|
| 703 |
placeholder="Evaluation results will appear here."
|
| 704 |
)
|
| 705 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 706 |
# Define button click actions
|
| 707 |
run_button.click(
|
| 708 |
run_evaluation,
|
|
|
|
| 727 |
inputs=[detail_output], # Pass the component itself as input
|
| 728 |
outputs=[detail_output] # The component to update
|
| 729 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 730 |
|
| 731 |
# Toggle visibility of debug error column
|
| 732 |
debug_button.click(
|