|
|
| import os
|
| import json
|
| import re
|
| import pandas as pd
|
|
|
|
|
| Noise_Robustness_DIR = "results/Noise Robustness/"
|
| Negative_Rejection_DIR = "results/Negative Rejection/"
|
| Counterfactual_Robustness_DIR = "results/Counterfactual Robustness/"
|
| Infomration_Integration_DIR = "results/Information Integration/"
|
|
|
|
|
|
|
| def load_scores_common(file_dir, config):
|
|
|
| if not os.path.exists(file_dir):
|
| return pd.DataFrame(columns=["Model", "0.2", "0.4", "0.6", "0.8", "1.0"])
|
|
|
| score_data = {}
|
|
|
|
|
| fixed_noise_rates = ["0.2", "0.4", "0.6", "0.8", "1.0"]
|
|
|
|
|
| for model in config["models"]:
|
|
|
| pattern = re.compile(
|
| rf"^scores_{re.escape(model)}_noise_(?P<noise_rate>[\d.]+)_"
|
| rf"passage_{re.escape(str(config['passage_num']))}_num_queries_{re.escape(str(config['num_queries']))}\.json$"
|
| )
|
|
|
| model_scores = {rate: "N/A" for rate in fixed_noise_rates}
|
|
|
|
|
| for filename in os.listdir(file_dir):
|
| match = pattern.match(filename)
|
| if match:
|
| noise_rate = match.group("noise_rate")
|
|
|
| if noise_rate in fixed_noise_rates:
|
| filepath = os.path.join(file_dir, filename)
|
| with open(filepath, "r") as f:
|
| score = json.load(f)
|
| accuracy = score.get("accuracy", "N/A")
|
| model_scores[noise_rate] = f"{accuracy * 100:.2f}"
|
|
|
|
|
| score_data[model] = model_scores
|
|
|
|
|
| df = pd.DataFrame([
|
| {
|
| "Model": model,
|
| **score_data[model]
|
| }
|
| for model in config["models"]
|
| ])
|
|
|
| return df
|
|
|
|
|
| def load_negative_rejection_scores(config):
|
| if not os.path.exists(Negative_Rejection_DIR):
|
| return pd.DataFrame()
|
|
|
| if not os.path.exists(Negative_Rejection_DIR):
|
| return pd.DataFrame(columns=["Model", "Rejection Rate %"])
|
|
|
| score_data = {}
|
|
|
|
|
| for model in config["models"]:
|
|
|
| expected_filename = f"scores_{model}_noise_1.0_passage_{config['passage_num']}_num_queries_{config['num_queries']}.json"
|
| filepath = os.path.join(Negative_Rejection_DIR, expected_filename)
|
|
|
|
|
| if os.path.exists(filepath):
|
| with open(filepath, "r") as f:
|
| score = json.load(f)
|
| reject_rate = score.get("reject_rate", "N/A")
|
| score_data[model] = f"{reject_rate * 100}" if reject_rate != "N/A" else "N/A"
|
| else:
|
| score_data[model] = "N/A"
|
|
|
|
|
| df = pd.DataFrame([
|
| {"Model": model, "Rejection Rate %": score_data[model]}
|
| for model in config["models"]
|
| ])
|
|
|
| return df
|
|
|
| def load_counterfactual_robustness_scores(config):
|
| """Load and format counterfactual robustness scores into a table with proper formatting."""
|
| config['noise_rate'] = 0.4
|
|
|
| if not os.path.exists(Counterfactual_Robustness_DIR):
|
| return pd.DataFrame(columns=["Model", "Accuracy (%)", "Acc_doc (%)", "Error Detection Rate (%)", "Correction Rate (%)"])
|
|
|
| score_data = {}
|
|
|
| for model in config["models"]:
|
| expected_filename = f"scores_{model}_noise_{config['noise_rate']}_passage_{config['passage_num']}_num_queries_{config['num_queries']}.json"
|
| filepath = os.path.join(Counterfactual_Robustness_DIR, expected_filename)
|
|
|
| if os.path.exists(filepath):
|
| with open(filepath, "r") as f:
|
| scores_json = json.load(f)
|
| factual_score = next((s for s in scores_json["conditions"] if s["condition_label"] == "factual_only"), {})
|
| counterfactual_score = next((s for s in scores_json["conditions"] if s["condition_label"] == "counterfactual"), {})
|
|
|
| score_data[model] = {
|
| "Accuracy (%)": int(round(factual_score.get("all_rate", 0) * 100)) if factual_score else "N/A",
|
| "Acc_doc (%)": int(round(counterfactual_score.get("all_rate", 0) * 100)) if counterfactual_score else "N/A",
|
| "Error Detection Rate (%)": int(round(counterfactual_score.get("reject_rate", 0) * 100)) if counterfactual_score else "N/A",
|
| "Correction Rate (%)": round(counterfactual_score.get("correct_rate", 0) * 100, 2) if counterfactual_score else "N/A"
|
| }
|
| else:
|
| score_data[model] = {
|
| "Accuracy (%)": "N/A",
|
| "Acc_doc (%)": "N/A",
|
| "Error Detection Rate (%)": "N/A",
|
| "Correction Rate (%)": "N/A"
|
| }
|
|
|
|
|
| df = pd.DataFrame([
|
| {
|
| "Model": model,
|
| "Accuracy (%)": f"{score_data[model]['Accuracy (%)']}" if score_data[model]["Accuracy (%)"] != "N/A" else "N/A",
|
| "Acc_doc (%)": f"{score_data[model]['Acc_doc (%)']}" if score_data[model]["Acc_doc (%)"] != "N/A" else "N/A",
|
| "Error Detection Rate (%)": f"{score_data[model]['Error Detection Rate (%)']}" if score_data[model]["Error Detection Rate (%)"] != "N/A" else "N/A",
|
| "Correction Rate (%)": f"{score_data[model]['Correction Rate (%)']:.2f}" if score_data[model]["Correction Rate (%)"] != "N/A" else "N/A"
|
| }
|
| for model in config["models"]
|
| ])
|
|
|
| return df
|
|
|