Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -8,6 +8,7 @@ import json
|
|
| 8 |
import pandas as pd
|
| 9 |
import matplotlib.pyplot as plt
|
| 10 |
import traceback # Import traceback for detailed error logging
|
|
|
|
| 11 |
|
| 12 |
# Cache to avoid reloading the model
|
| 13 |
model_cache = {}
|
|
@@ -53,7 +54,7 @@ def get_all_benchmark_options():
|
|
| 53 |
# Initialize these once globally when the app starts
|
| 54 |
ALL_BENCHMARK_SUBJECTS, GRADIO_DROPDOWN_OPTIONS = get_all_benchmark_options()
|
| 55 |
|
| 56 |
-
|
| 57 |
def load_model(model_id):
|
| 58 |
"""
|
| 59 |
Loads a Hugging Face model and its tokenizer, then creates a text-generation pipeline.
|
|
@@ -186,7 +187,7 @@ def evaluate_single_subject(generator, dataset_id, subject, sample_count, progre
|
|
| 186 |
accuracy = (correct_count / len(dataset)) * 100 if len(dataset) > 0 else 0
|
| 187 |
return accuracy, subject_results
|
| 188 |
|
| 189 |
-
|
| 190 |
def run_evaluation(model_id, selected_benchmark_subject, sample_count, progress=gr.Progress()):
|
| 191 |
"""
|
| 192 |
Main function to orchestrate the evaluation process.
|
|
@@ -298,7 +299,7 @@ def run_evaluation(model_id, selected_benchmark_subject, sample_count, progress=
|
|
| 298 |
gr.Info("Evaluation completed successfully!")
|
| 299 |
return score_string, \
|
| 300 |
gr.update(value="", visible=False), gr.update(visible=False), \
|
| 301 |
-
gr.update(visible=
|
| 302 |
|
| 303 |
except Exception as e:
|
| 304 |
error_message = str(e)
|
|
@@ -616,4 +617,4 @@ with gr.Blocks(css="""
|
|
| 616 |
demo.load(load_leaderboard, inputs=[], outputs=[leaderboard_plot_output, leaderboard_table_output])
|
| 617 |
|
| 618 |
# Launch the Gradio app
|
| 619 |
-
demo.launch()
|
|
|
|
| 8 |
import pandas as pd
|
| 9 |
import matplotlib.pyplot as plt
|
| 10 |
import traceback # Import traceback for detailed error logging
|
| 11 |
+
import spaces # Import the spaces library
|
| 12 |
|
| 13 |
# Cache to avoid reloading the model
|
| 14 |
model_cache = {}
|
|
|
|
| 54 |
# Initialize these once globally when the app starts
|
| 55 |
ALL_BENCHMARK_SUBJECTS, GRADIO_DROPDOWN_OPTIONS = get_all_benchmark_options()
|
| 56 |
|
| 57 |
+
@spaces.GPU() # Decorator to ensure this function runs on GPU if available
|
| 58 |
def load_model(model_id):
|
| 59 |
"""
|
| 60 |
Loads a Hugging Face model and its tokenizer, then creates a text-generation pipeline.
|
|
|
|
| 187 |
accuracy = (correct_count / len(dataset)) * 100 if len(dataset) > 0 else 0
|
| 188 |
return accuracy, subject_results
|
| 189 |
|
| 190 |
+
@spaces.GPU() # Decorator to ensure this function runs on GPU if available
|
| 191 |
def run_evaluation(model_id, selected_benchmark_subject, sample_count, progress=gr.Progress()):
|
| 192 |
"""
|
| 193 |
Main function to orchestrate the evaluation process.
|
|
|
|
| 299 |
gr.Info("Evaluation completed successfully!")
|
| 300 |
return score_string, \
|
| 301 |
gr.update(value="", visible=False), gr.update(visible=False), \
|
| 302 |
+
gr.update(visible=true), gr.update(visible=true), gr.update(value=formatted_details, visible=False)
|
| 303 |
|
| 304 |
except Exception as e:
|
| 305 |
error_message = str(e)
|
|
|
|
| 617 |
demo.load(load_leaderboard, inputs=[], outputs=[leaderboard_plot_output, leaderboard_table_output])
|
| 618 |
|
| 619 |
# Launch the Gradio app
|
| 620 |
+
demo.launch()
|