Spaces:

MBZUAI-LLM
/

Mobile-MMLU-Challenge

Running

App Files Files Community

SondosMB commited on Mar 24

Commit

f9f34c4

verified ·

1 Parent(s): 8bf3c31

Update app.py

Browse files

Files changed (1) hide show

app.py +135 -15

app.py CHANGED Viewed

@@ -168,6 +168,20 @@ if not HF_TOKEN:
 #             "Correct Predictions", "Total Questions", "Timestamp"
 #         ]).to_csv(LEADERBOARD_FILE, index=False)
 def initialize_leaderboard_file():
     """
     Ensure the leaderboard file exists and has the correct headers.
@@ -181,8 +195,8 @@ def initialize_leaderboard_file():
         pd.DataFrame(columns=[
             "Model Name", "Overall Accuracy", "Correct Predictions",
             "Total Questions", "Timestamp", "Team Name"
-        ]).to_csv(LEADERBOARD_FILE, index=False)
 def initialize_leaderboard_pro_file():
     """
     Ensure the leaderboard file exists and has the correct headers.
@@ -430,6 +444,63 @@ def load_leaderboard_pro():
 #     except Exception as e:
 #         return f"Error during evaluation: {str(e)}", load_leaderboard()
 def evaluate_predictions(prediction_file, model_name,Team_name ,add_to_leaderboard):
     try:
         ground_truth_path = hf_hub_download(
@@ -455,7 +526,7 @@ def evaluate_predictions(prediction_file, model_name,Team_name ,add_to_leaderboa
         missing_columns = [col for col in required_columns if col not in predictions_df.columns]
         if missing_columns:
             return (f"Error: Missing required columns in prediction file: {', '.join(missing_columns)}.",
-                    load_leaderboard())
         # Validate 'Answer' column in ground truth file
         if 'Answer' not in ground_truth_df.columns:
@@ -484,9 +555,7 @@ def evaluate_predictions(prediction_file, model_name,Team_name ,add_to_leaderboa
             return "Evaluation completed but not added to leaderboard.", load_leaderboard()
     except Exception as e:
-        return f"Error during evaluation: {str(e)}", load_leaderboard()
-initialize_leaderboard_file()
 def evaluate_predictions_pro(prediction_file, model_name,Team_name ,add_to_leaderboard):
@@ -936,16 +1005,62 @@ with gr.Blocks(css=css_tech_theme) as demo:
-        def handle_evaluation(file, model_name, Team_name):
-            print("🚀 Evaluation function started 1")  # Debugging print
             if not file:
-                print("🚀 Evaluation function started 2")  # Debugging print
                 return "Error: Please upload a prediction file.", 0, gr.update(visible=False)
             if not model_name or model_name.strip() == "":
-                print("🚀 Evaluation function started 3")  # Debugging print
                 return "Error: Please enter a model name.", 0, gr.update(visible=False)
             if not Team_name or Team_name.strip() == "":
-                print("🚀 Evaluation function started 4")  # Debugging print
                 return "Error: Please enter a Team name.", 0, gr.update(visible=False)
             try:
@@ -984,9 +1099,9 @@ with gr.Blocks(css=css_tech_theme) as demo:
                 return "Evaluation completed successfully.", overall_accuracy, gr.update(visible=True)
             except Exception as e:
-                return f"Error during evaluation: {str(e)}", 0, gr.update(visible=False)
-        def handle_evaluation_pro(file, model_name, Team_name):
             if not file:
                 return "Error: Please upload a prediction file.", 0, gr.update(visible=False)
             if not model_name or model_name.strip() == "":
@@ -1030,8 +1145,7 @@ with gr.Blocks(css=css_tech_theme) as demo:
                 return "Evaluation completed successfully.", overall_accuracy, gr.update(visible=True)
             except Exception as e:
-                return f"Error during evaluation: {str(e)}", 0, gr.update(visible=False)
@@ -1060,6 +1174,12 @@ with gr.Blocks(css=css_tech_theme) as demo:
             outputs=[eval_status, overall_accuracy_display, submit_button_pro],
         )
         submit_button_pro.click(
             handle_submission_pro,
             inputs=[file_input, model_name_input,Team_name_input],

 #             "Correct Predictions", "Total Questions", "Timestamp"
 #         ]).to_csv(LEADERBOARD_FILE, index=False)
+# def initialize_leaderboard_file():
+#     """
+#     Ensure the leaderboard file exists and has the correct headers.
+#     """
+#     if not os.path.exists(LEADERBOARD_FILE):
+#         pd.DataFrame(columns=[
+#             "Model Name", "Overall Accuracy", "Correct Predictions",
+#             "Total Questions", "Timestamp", "Team Name"
+#         ]).to_csv(LEADERBOARD_FILE, index=False)
+#     elif os.stat(LEADERBOARD_FILE).st_size == 0:
+#         pd.DataFrame(columns=[
+#             "Model Name", "Overall Accuracy", "Correct Predictions",
+#             "Total Questions", "Timestamp", "Team Name"
+#         ]).to_csv(LEADERBOARD_FILE, index=False)
 def initialize_leaderboard_file():
     """
     Ensure the leaderboard file exists and has the correct headers.
         pd.DataFrame(columns=[
             "Model Name", "Overall Accuracy", "Correct Predictions",
             "Total Questions", "Timestamp", "Team Name"
+        ]).to_csv(LEADERBOARD_FILE, index=False)
 def initialize_leaderboard_pro_file():
     """
     Ensure the leaderboard file exists and has the correct headers.
 #     except Exception as e:
 #         return f"Error during evaluation: {str(e)}", load_leaderboard()
+# def evaluate_predictions(prediction_file, model_name,Team_name ,add_to_leaderboard):
+#     try:
+#         ground_truth_path = hf_hub_download(
+#             repo_id="SondosMB/ground-truth-dataset",
+#             filename="ground_truth.csv",
+#             repo_type="dataset",
+#             use_auth_token=True
+#         )
+#         ground_truth_df = pd.read_csv(ground_truth_path)
+#     except FileNotFoundError:
+#         return "Ground truth file not found in the dataset repository.", load_leaderboard()
+#     except Exception as e:
+#         return f"Error loading ground truth: {e}", load_leaderboard()
+#     if not prediction_file:
+#         return "Prediction file not uploaded.", load_leaderboard()
+#     try:
+#         #load prediction file
+#         predictions_df = pd.read_csv(prediction_file.name)
+#         # Validate required columns in prediction file
+#         required_columns = ['question_id', 'predicted_answer']
+#         missing_columns = [col for col in required_columns if col not in predictions_df.columns]
+#         if missing_columns:
+#             return (f"Error: Missing required columns in prediction file: {', '.join(missing_columns)}.",
+#                     load_leaderboard())
+#         # Validate 'Answer' column in ground truth file
+#         if 'Answer' not in ground_truth_df.columns:
+#             return "Error: 'Answer' column is missing in the ground truth dataset.", load_leaderboard()
+#         merged_df = pd.merge(predictions_df, ground_truth_df, on='question_id', how='inner')
+#         merged_df['pred_answer'] = merged_df['predicted_answer'].apply(clean_answer)
+#         valid_predictions = merged_df.dropna(subset=['pred_answer'])
+#         correct_predictions = (valid_predictions['pred_answer'] == valid_predictions['Answer']).sum()
+#         total_predictions = len(merged_df)
+#         overall_accuracy = correct_predictions / total_predictions if total_predictions > 0 else 0
+#         results = {
+#             'model_name': model_name if model_name else "Unknown Model",
+#             'overall_accuracy': overall_accuracy,
+#             'correct_predictions': correct_predictions,
+#             'total_questions': total_predictions,
+#             'Team_name': Team_name if Team_name else "Unknown Team",
+#         }
+#         if add_to_leaderboard:
+#             update_leaderboard(results)
+#             return "Evaluation completed and added to leaderboard.", load_leaderboard()
+#         else:
+#             return "Evaluation completed but not added to leaderboard.", load_leaderboard()
+#     except Exception as e:
+#         return f"Error during evaluation: {str(e)}", load_leaderboard()
+# initialize_leaderboard_file()
 def evaluate_predictions(prediction_file, model_name,Team_name ,add_to_leaderboard):
     try:
         ground_truth_path = hf_hub_download(
         missing_columns = [col for col in required_columns if col not in predictions_df.columns]
         if missing_columns:
             return (f"Error: Missing required columns in prediction file: {', '.join(missing_columns)}.",
+                    load_leaderboard_pro())
         # Validate 'Answer' column in ground truth file
         if 'Answer' not in ground_truth_df.columns:
             return "Evaluation completed but not added to leaderboard.", load_leaderboard()
     except Exception as e:
+        return f"Error during evaluation: {str(e)}", load_leaderboard(),initialize_leaderboard_file()
 def evaluate_predictions_pro(prediction_file, model_name,Team_name ,add_to_leaderboard):
+        # def handle_evaluation(file, model_name, Team_name):
+        #     print("🚀 Evaluation function started 1")  # Debugging print
+        #     if not file:
+        #         print("🚀 Evaluation function started 2")  # Debugging print
+        #         return "Error: Please upload a prediction file.", 0, gr.update(visible=False)
+        #     if not model_name or model_name.strip() == "":
+        #         print("🚀 Evaluation function started 3")  # Debugging print
+        #         return "Error: Please enter a model name.", 0, gr.update(visible=False)
+        #     if not Team_name or Team_name.strip() == "":
+        #         print("🚀 Evaluation function started 4")  # Debugging print
+        #         return "Error: Please enter a Team name.", 0, gr.update(visible=False)
+        #     try:
+        #         # Load predictions file
+        #         predictions_df = pd.read_csv(file.name)
+        #         # Validate required columns
+        #         required_columns = ['question_id', 'predicted_answer']
+        #         missing_columns = [col for col in required_columns if col not in predictions_df.columns]
+        #         if missing_columns:
+        #             return (f"Error: Missing required columns in prediction file: {', '.join(missing_columns)}.",
+        #                     0, gr.update(visible=False))
+        #         # Load ground truth
+        #         try:
+        #             ground_truth_path = hf_hub_download(
+        #                 repo_id="SondosMB/ground-truth-dataset",
+        #                 filename="ground_truth.csv",
+        #                 repo_type="dataset",
+        #                 use_auth_token=True
+        #             )
+        #             ground_truth_df = pd.read_csv(ground_truth_path)
+        #         except Exception as e:
+        #             return f"Error loading ground truth: {e}", 0, gr.update(visible=False)
+        #         # Perform evaluation calculations
+        #         merged_df = pd.merge(predictions_df, ground_truth_df, on='question_id', how='inner')
+        #         merged_df['pred_answer'] = merged_df['predicted_answer'].apply(clean_answer)
+        #         valid_predictions = merged_df.dropna(subset=['pred_answer'])
+        #         correct_predictions = (valid_predictions['pred_answer'] == valid_predictions['Answer']).sum()
+        #         total_predictions = len(merged_df)
+        #         overall_accuracy = (correct_predictions / total_predictions * 100) if total_predictions > 0 else 0
+        #         return "Evaluation completed successfully.", overall_accuracy, gr.update(visible=True)
+        #     except Exception as e:
+        #         return f"Error during evaluation: {str(e)}", 0, gr.update(visible=False)
+        def handle_evaluation_pro(file, model_name, Team_name):
             if not file:
                 return "Error: Please upload a prediction file.", 0, gr.update(visible=False)
             if not model_name or model_name.strip() == "":
                 return "Error: Please enter a model name.", 0, gr.update(visible=False)
             if not Team_name or Team_name.strip() == "":
                 return "Error: Please enter a Team name.", 0, gr.update(visible=False)
             try:
                 return "Evaluation completed successfully.", overall_accuracy, gr.update(visible=True)
             except Exception as e:
+                return f"Error during evaluation: {str(e)}", 0, gr.update(visible=False)
+        def handle_evaluation(file, model_name, Team_name):
             if not file:
                 return "Error: Please upload a prediction file.", 0, gr.update(visible=False)
             if not model_name or model_name.strip() == "":
                 return "Evaluation completed successfully.", overall_accuracy, gr.update(visible=True)
             except Exception as e:
+                return f"Error during evaluation: {str(e)}", 0, gr.update(visible=False)
             outputs=[eval_status, overall_accuracy_display, submit_button_pro],
         )
+          eval_button.click(
+            handle_evaluation,
+            inputs=[file_input, model_name_input,Team_name_input],
+            outputs=[eval_status, overall_accuracy_display, submit_button_pro],
+        )
         submit_button_pro.click(
             handle_submission_pro,
             inputs=[file_input, model_name_input,Team_name_input],