Update app.py
Browse files
app.py
CHANGED
|
@@ -641,8 +641,40 @@ with gr.Blocks(css=css_tech_theme) as demo:
|
|
| 641 |
eval_status = gr.Textbox(label="🛠️ Evaluation Status", interactive=False,scale=1,min_width=1200)
|
| 642 |
|
| 643 |
# Define the functions outside the `with` block
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 644 |
def handle_evaluation(file, model_name):
|
| 645 |
-
# Check if required inputs are provided
|
| 646 |
if not file:
|
| 647 |
return "Error: Please upload a prediction file.", 0, gr.update(visible=False)
|
| 648 |
if not model_name or model_name.strip() == "":
|
|
@@ -652,27 +684,39 @@ with gr.Blocks(css=css_tech_theme) as demo:
|
|
| 652 |
# Load predictions file
|
| 653 |
predictions_df = pd.read_csv(file.name)
|
| 654 |
|
| 655 |
-
# Validate required columns
|
| 656 |
required_columns = ['question_id', 'predicted_answer']
|
| 657 |
missing_columns = [col for col in required_columns if col not in predictions_df.columns]
|
| 658 |
if missing_columns:
|
| 659 |
return (f"Error: Missing required columns in prediction file: {', '.join(missing_columns)}.",
|
| 660 |
0, gr.update(visible=False))
|
| 661 |
|
| 662 |
-
#
|
| 663 |
-
|
| 664 |
-
|
| 665 |
-
|
| 666 |
-
|
| 667 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 668 |
|
| 669 |
-
|
| 670 |
-
|
| 671 |
-
|
| 672 |
-
|
| 673 |
-
|
| 674 |
-
|
| 675 |
-
|
|
|
|
|
|
|
|
|
|
| 676 |
|
| 677 |
|
| 678 |
|
|
|
|
| 641 |
eval_status = gr.Textbox(label="🛠️ Evaluation Status", interactive=False,scale=1,min_width=1200)
|
| 642 |
|
| 643 |
# Define the functions outside the `with` block
|
| 644 |
+
# def handle_evaluation(file, model_name):
|
| 645 |
+
# # Check if required inputs are provided
|
| 646 |
+
# if not file:
|
| 647 |
+
# return "Error: Please upload a prediction file.", 0, gr.update(visible=False)
|
| 648 |
+
# if not model_name or model_name.strip() == "":
|
| 649 |
+
# return "Error: Please enter a model name.", 0, gr.update(visible=False)
|
| 650 |
+
|
| 651 |
+
# try:
|
| 652 |
+
# # Load predictions file
|
| 653 |
+
# predictions_df = pd.read_csv(file.name)
|
| 654 |
+
|
| 655 |
+
# # Validate required columns in the prediction file
|
| 656 |
+
# required_columns = ['question_id', 'predicted_answer']
|
| 657 |
+
# missing_columns = [col for col in required_columns if col not in predictions_df.columns]
|
| 658 |
+
# if missing_columns:
|
| 659 |
+
# return (f"Error: Missing required columns in prediction file: {', '.join(missing_columns)}.",
|
| 660 |
+
# 0, gr.update(visible=False))
|
| 661 |
+
|
| 662 |
+
# # Perform evaluation
|
| 663 |
+
# status, leaderboard = evaluate_predictions(file, model_name, add_to_leaderboard=False)
|
| 664 |
+
# if leaderboard.empty:
|
| 665 |
+
# overall_accuracy = 0
|
| 666 |
+
# else:
|
| 667 |
+
# overall_accuracy = leaderboard.iloc[-1]["Overall Accuracy"]
|
| 668 |
+
|
| 669 |
+
|
| 670 |
+
# # Show the submit button after successful evaluation
|
| 671 |
+
# return status, overall_accuracy, gr.update(visible=True)
|
| 672 |
+
|
| 673 |
+
# except Exception as e:
|
| 674 |
+
# # Handle unexpected errors
|
| 675 |
+
# return f"Error during evaluation: {str(e)}", 0, gr.update(visible=False)
|
| 676 |
+
|
| 677 |
def handle_evaluation(file, model_name):
|
|
|
|
| 678 |
if not file:
|
| 679 |
return "Error: Please upload a prediction file.", 0, gr.update(visible=False)
|
| 680 |
if not model_name or model_name.strip() == "":
|
|
|
|
| 684 |
# Load predictions file
|
| 685 |
predictions_df = pd.read_csv(file.name)
|
| 686 |
|
| 687 |
+
# Validate required columns
|
| 688 |
required_columns = ['question_id', 'predicted_answer']
|
| 689 |
missing_columns = [col for col in required_columns if col not in predictions_df.columns]
|
| 690 |
if missing_columns:
|
| 691 |
return (f"Error: Missing required columns in prediction file: {', '.join(missing_columns)}.",
|
| 692 |
0, gr.update(visible=False))
|
| 693 |
|
| 694 |
+
# Load ground truth
|
| 695 |
+
try:
|
| 696 |
+
ground_truth_path = hf_hub_download(
|
| 697 |
+
repo_id="SondosMB/ground-truth-dataset",
|
| 698 |
+
filename="ground_truth.csv",
|
| 699 |
+
repo_type="dataset",
|
| 700 |
+
use_auth_token=True
|
| 701 |
+
)
|
| 702 |
+
ground_truth_df = pd.read_csv(ground_truth_path)
|
| 703 |
+
except Exception as e:
|
| 704 |
+
return f"Error loading ground truth: {e}", 0, gr.update(visible=False)
|
| 705 |
+
|
| 706 |
+
# Perform evaluation calculations
|
| 707 |
+
merged_df = pd.merge(predictions_df, ground_truth_df, on='question_id', how='inner')
|
| 708 |
+
merged_df['pred_answer'] = merged_df['predicted_answer'].apply(clean_answer)
|
| 709 |
|
| 710 |
+
valid_predictions = merged_df.dropna(subset=['pred_answer'])
|
| 711 |
+
correct_predictions = (valid_predictions['pred_answer'] == valid_predictions['Answer']).sum()
|
| 712 |
+
total_predictions = len(merged_df)
|
| 713 |
+
|
| 714 |
+
overall_accuracy = (correct_predictions / total_predictions * 100) if total_predictions > 0 else 0
|
| 715 |
+
|
| 716 |
+
return "Evaluation completed successfully.", overall_accuracy, gr.update(visible=True)
|
| 717 |
+
|
| 718 |
+
except Exception as e:
|
| 719 |
+
return f"Error during evaluation: {str(e)}", 0, gr.update(visible=False)
|
| 720 |
|
| 721 |
|
| 722 |
|