| | """Generate MIB-style sample data for the dataset visualizer.""" |
| | import pandas as pd |
| | import numpy as np |
| | from pathlib import Path |
| |
|
| | np.random.seed(42) |
| |
|
| | |
| | K_VALUES = [0.001, 0.002, 0.005, 0.01, 0.02, 0.05, 0.1, 0.2, 0.5, 1.0] |
| |
|
| | TASKS = ["IOI", "MCQA", "Arithmetic", "ARC-E", "ARC-C"] |
| | MODELS = ["GPT-2", "Qwen-2.5", "Gemma-2", "Llama-3.1"] |
| | METHODS = [ |
| | "EAP-IG (CF)", |
| | "EAP (CF)", |
| | "NAP-IG (CF)", |
| | "UGS", |
| | "IFR", |
| | "Random", |
| | ] |
| |
|
| | |
| | def sample_f(k: float, method: str, task: str) -> float: |
| | base = np.clip(0.1 + 0.85 * (k ** 0.6) + np.random.uniform(-0.05, 0.05), 0, 1) |
| | if method == "Random": |
| | return np.clip(0.2 + 0.3 * k + np.random.uniform(-0.08, 0.08), 0, 1) |
| | if method == "IFR": |
| | base *= 0.75 |
| | elif method == "NAP-IG (CF)": |
| | base *= 0.85 |
| | elif method == "EAP (CF)": |
| | base *= 0.92 |
| | elif method == "UGS": |
| | base *= 0.88 |
| | return np.clip(base + np.random.uniform(-0.03, 0.03), 0, 1) |
| |
|
| |
|
| | def main(): |
| | out = Path(__file__).resolve().parents[1] / "data" |
| | out.mkdir(parents=True, exist_ok=True) |
| |
|
| | |
| | rows = [] |
| | for method in METHODS: |
| | for task in TASKS[:3]: |
| | for model in MODELS[:2]: |
| | for k in K_VALUES: |
| | f = sample_f(k, method, task) |
| | rows.append({"method": method, "task": task, "model": model, "k": k, "f": round(f, 4)}) |
| | curves = pd.DataFrame(rows) |
| | curves.to_csv(out / "faithfulness_curves.csv", index=False) |
| |
|
| | |
| | overview = pd.DataFrame([ |
| | {"dataset": "IOI", "split": "Train", "count": 10000}, |
| | {"dataset": "IOI", "split": "Validation", "count": 10000}, |
| | {"dataset": "IOI", "split": "Test (Public)", "count": 10000}, |
| | {"dataset": "IOI", "split": "Test (Private)", "count": 10000}, |
| | {"dataset": "MCQA", "split": "Train", "count": 110}, |
| | {"dataset": "MCQA", "split": "Validation", "count": 50}, |
| | {"dataset": "MCQA", "split": "Test (Public)", "count": 50}, |
| | {"dataset": "MCQA", "split": "Test (Private)", "count": 50}, |
| | {"dataset": "Arithmetic (+)", "split": "Train", "count": 34400}, |
| | {"dataset": "Arithmetic (+)", "split": "Validation", "count": 4920}, |
| | {"dataset": "Arithmetic (+)", "split": "Test (Public)", "count": 4920}, |
| | {"dataset": "Arithmetic (+)", "split": "Test (Private)", "count": 4920}, |
| | {"dataset": "Arithmetic (-)", "split": "Train", "count": 17400}, |
| | {"dataset": "Arithmetic (-)", "split": "Validation", "count": 2484}, |
| | {"dataset": "Arithmetic (-)", "split": "Test (Public)", "count": 2484}, |
| | {"dataset": "Arithmetic (-)", "split": "Test (Private)", "count": 2484}, |
| | {"dataset": "ARC (Easy)", "split": "Train", "count": 2251}, |
| | {"dataset": "ARC (Easy)", "split": "Validation", "count": 570}, |
| | {"dataset": "ARC (Easy)", "split": "Test (Public)", "count": 1188}, |
| | {"dataset": "ARC (Easy)", "split": "Test (Private)", "count": 1188}, |
| | {"dataset": "ARC (Challenge)", "split": "Train", "count": 1119}, |
| | {"dataset": "ARC (Challenge)", "split": "Validation", "count": 299}, |
| | {"dataset": "ARC (Challenge)", "split": "Test (Public)", "count": 586}, |
| | {"dataset": "ARC (Challenge)", "split": "Test (Private)", "count": 586}, |
| | ]) |
| | overview.to_csv(out / "dataset_overview.csv", index=False) |
| |
|
| | |
| | metrics = [] |
| | for method in METHODS: |
| | for task in TASKS: |
| | for model in MODELS: |
| | cpr = round(np.random.uniform(0.2, 1.0) if method != "Random" else np.random.uniform(0.2, 0.35), 3) |
| | cmd = round(np.random.uniform(0.01, 0.4) if method != "Random" else np.random.uniform(0.68, 0.78), 3) |
| | metrics.append({"method": method, "task": task, "model": model, "CPR": cpr, "CMD": cmd}) |
| | metrics_df = pd.DataFrame(metrics) |
| | metrics_df.to_csv(out / "metrics_table.csv", index=False) |
| |
|
| | print("Wrote:", out / "faithfulness_curves.csv", out / "dataset_overview.csv", out / "metrics_table.csv") |
| |
|
| |
|
| | if __name__ == "__main__": |
| | main() |
| |
|