Spaces:

Enderchef
/

SuperBench-Eval

Sleeping

App Files Files Community

Enderchef commited on Jun 25, 2025

Commit

468784f

verified ·

1 Parent(s): d18e7af

Update app.py

Browse files

Files changed (1) hide show

app.py +5 -4

app.py CHANGED Viewed

@@ -8,6 +8,7 @@ import json
 import pandas as pd
 import matplotlib.pyplot as plt
 import traceback # Import traceback for detailed error logging
 # Cache to avoid reloading the model
 model_cache = {}
@@ -53,7 +54,7 @@ def get_all_benchmark_options():
 # Initialize these once globally when the app starts
 ALL_BENCHMARK_SUBJECTS, GRADIO_DROPDOWN_OPTIONS = get_all_benchmark_options()
 def load_model(model_id):
     """
     Loads a Hugging Face model and its tokenizer, then creates a text-generation pipeline.
@@ -186,7 +187,7 @@ def evaluate_single_subject(generator, dataset_id, subject, sample_count, progre
     accuracy = (correct_count / len(dataset)) * 100 if len(dataset) > 0 else 0
     return accuracy, subject_results
 def run_evaluation(model_id, selected_benchmark_subject, sample_count, progress=gr.Progress()):
     """
     Main function to orchestrate the evaluation process.
@@ -298,7 +299,7 @@ def run_evaluation(model_id, selected_benchmark_subject, sample_count, progress=
         gr.Info("Evaluation completed successfully!")
         return score_string, \
                gr.update(value="", visible=False), gr.update(visible=False), \
-               gr.update(visible=True), gr.update(visible=True), gr.update(value=formatted_details, visible=False)
     except Exception as e:
         error_message = str(e)
@@ -616,4 +617,4 @@ with gr.Blocks(css="""
             demo.load(load_leaderboard, inputs=[], outputs=[leaderboard_plot_output, leaderboard_table_output])
 # Launch the Gradio app
-demo.launch()

 import pandas as pd
 import matplotlib.pyplot as plt
 import traceback # Import traceback for detailed error logging
+import spaces # Import the spaces library
 # Cache to avoid reloading the model
 model_cache = {}
 # Initialize these once globally when the app starts
 ALL_BENCHMARK_SUBJECTS, GRADIO_DROPDOWN_OPTIONS = get_all_benchmark_options()
+@spaces.GPU() # Decorator to ensure this function runs on GPU if available
 def load_model(model_id):
     """
     Loads a Hugging Face model and its tokenizer, then creates a text-generation pipeline.
     accuracy = (correct_count / len(dataset)) * 100 if len(dataset) > 0 else 0
     return accuracy, subject_results
+@spaces.GPU() # Decorator to ensure this function runs on GPU if available
 def run_evaluation(model_id, selected_benchmark_subject, sample_count, progress=gr.Progress()):
     """
     Main function to orchestrate the evaluation process.
         gr.Info("Evaluation completed successfully!")
         return score_string, \
                gr.update(value="", visible=False), gr.update(visible=False), \
+               gr.update(visible=true), gr.update(visible=true), gr.update(value=formatted_details, visible=False)
     except Exception as e:
         error_message = str(e)
             demo.load(load_leaderboard, inputs=[], outputs=[leaderboard_plot_output, leaderboard_table_output])
 # Launch the Gradio app
+demo.launch()