Spaces:

MBZUAI-LLM
/

Mobile-MMLU-Challenge

Running

App Files Files Community

SondosMB commited on Mar 26

Commit

6c03d0d

verified ·

1 Parent(s): 5c06559

Update app.py

Browse files

Files changed (1) hide show

app.py +31 -0

app.py CHANGED Viewed

@@ -533,6 +533,21 @@ def evaluate_predictions(prediction_file, model_name,Team_name ,add_to_leaderboa
         # Validate 'Answer' column in ground truth file
         if 'Answer' not in ground_truth_df.columns:
             return "Error: 'Answer' column is missing in the ground truth dataset.", load_leaderboard()
         merged_df = pd.merge(predictions_df, ground_truth_df, on='question_id', how='inner')
         merged_df['pred_answer'] = merged_df['predicted_answer'].apply(clean_answer)
@@ -590,6 +605,22 @@ def evaluate_predictions_pro(prediction_file, model_name,Team_name ,add_to_leade
         # Validate 'Answer' column in ground truth file
         if 'Answer' not in ground_truth_df.columns:
             return "Error: 'Answer' column is missing in the ground truth dataset.", load_leaderboard_pro()
         merged_df = pd.merge(predictions_df, ground_truth_df, on='question_id', how='inner')
         merged_df['pred_answer'] = merged_df['predicted_answer'].apply(clean_answer)

         # Validate 'Answer' column in ground truth file
         if 'Answer' not in ground_truth_df.columns:
             return "Error: 'Answer' column is missing in the ground truth dataset.", load_leaderboard()
+        # **Check if questions match**
+        pred_question_ids = set(predictions_df['question_id'])
+        gt_question_ids = set(ground_truth_df['question_id'])
+        missing_in_gt = pred_question_ids - gt_question_ids
+        missing_in_pred = gt_question_ids - pred_question_ids
+        if missing_in_gt:
+            return f"Error: Some question IDs in predictions are missing from the ground truth: {missing_in_gt}", load_leaderboard_pro()
+        if missing_in_pred:
+            return f"Warning: Some question IDs in ground truth are missing from the predictions: {missing_in_pred}", load_leaderboard_pro()
         merged_df = pd.merge(predictions_df, ground_truth_df, on='question_id', how='inner')
         merged_df['pred_answer'] = merged_df['predicted_answer'].apply(clean_answer)
         # Validate 'Answer' column in ground truth file
         if 'Answer' not in ground_truth_df.columns:
             return "Error: 'Answer' column is missing in the ground truth dataset.", load_leaderboard_pro()
+        # **Check if questions match**
+        pred_question_ids = set(predictions_df['question_id'])
+        gt_question_ids = set(ground_truth_df['question_id'])
+        missing_in_gt = pred_question_ids - gt_question_ids
+        missing_in_pred = gt_question_ids - pred_question_ids
+        if missing_in_gt:
+            return f"Error: Some question IDs in predictions are missing from the ground truth: {missing_in_gt}", load_leaderboard_pro()
+        if missing_in_pred:
+            return f"Warning: Some question IDs in ground truth are missing from the predictions: {missing_in_pred}", load_leaderboard_pro()
+        # Merge and evaluate
         merged_df = pd.merge(predictions_df, ground_truth_df, on='question_id', how='inner')
         merged_df['pred_answer'] = merged_df['predicted_answer'].apply(clean_answer)