| """Fig 7: error analysis & decision-rule robustness. |
| |
| (a) F1 by author/paper degree bucket -> cold-start error structure. |
| (b) calibration curve of the final model. |
| (c) rank-cutoff ratio vs F1 (with min-max band) vs probability-threshold drift. |
| """ |
| from pathlib import Path |
| import sys |
|
|
| import numpy as np |
| import pandas as pd |
| import matplotlib.pyplot as plt |
| from sklearn.calibration import calibration_curve |
|
|
| sys.path.insert(0, str(Path(__file__).resolve().parent)) |
| from plot_style import apply, save, PALETTE_DEEP as C |
|
|
| apply() |
| ROOT = Path(__file__).resolve().parents[2] |
| FIG = ROOT / "reports" / "figures" |
| VR = ROOT / "validation_runs" / "dynamic_seed202" |
| y = np.load(VR / "val_labels_seed202.npy").astype(int) |
| final_oof = np.load(VR / "high_order_graph_stack/rich_rw7_highorder_directed_oof.npy").astype(float) |
|
|
| fig, axes = plt.subplots(1, 3, figsize=(16.5, 4.8)) |
|
|
| |
| buckets = pd.read_csv(VR / "error_group_calibration/error_analysis_buckets.csv") |
|
|
|
|
| def plot_buckets(ax, btype, color, title): |
| sub = buckets[buckets["bucket_type"] == btype].copy() |
| sub = sub.reset_index(drop=True) |
| ax.bar(range(len(sub)), sub["f1"], color=color, alpha=0.85) |
| ax.set_xticks(range(len(sub))) |
| ax.set_xticklabels(sub["bucket"], rotation=35, ha="right", fontsize=8) |
| for i, f in enumerate(sub["f1"]): |
| ax.text(i, f + 0.01, f"{f:.2f}", ha="center", fontsize=8) |
| ax.set_ylabel("F1") |
| ax.set_ylim(0, 1.05) |
| ax.set_title(title) |
|
|
|
|
| plot_buckets(axes[0], "author_degree", C[0], "(a) F1 by author degree\n(cold-start authors hardest)") |
|
|
| |
| frac_pos, mean_pred = calibration_curve(y, final_oof, n_bins=10, strategy="quantile") |
| axes[1].plot([0, 1], [0, 1], "k--", lw=1, label="perfect") |
| axes[1].plot(mean_pred, frac_pos, "s-", color=C[2], label="final model") |
| axes[1].set_xlabel("Mean predicted probability") |
| axes[1].set_ylabel("Fraction of positives") |
| axes[1].set_xlim(0, 1) |
| axes[1].set_ylim(0, 1) |
| axes[1].set_title("(b) Calibration (final model)") |
| axes[1].legend(fontsize=9) |
|
|
| |
| ratio = pd.read_csv(ROOT / "validation_runs" / "stack_ratio_analysis.csv").sort_values("ratio") |
| axes[2].plot(ratio["ratio"], ratio["f1_mean"], "-o", color=C[0], label="rank-cutoff F1 (mean)") |
| axes[2].fill_between(ratio["ratio"], ratio["f1_min"], ratio["f1_max"], color=C[0], alpha=0.18, label="min–max band") |
| |
| axes[2].axvline(0.52, color=C[3], ls="--", lw=1.5) |
| axes[2].text(0.521, ratio["f1_mean"].min() + 0.0005, "prob-threshold\ndrifts to 0.52", color=C[3], fontsize=8) |
| axes[2].set_xlabel("Positive ratio (rank cutoff)") |
| axes[2].set_ylabel("F1") |
| axes[2].set_title("(c) Rank-cutoff stability") |
| axes[2].legend(fontsize=8.5, loc="lower left") |
|
|
| fig.suptitle("Error analysis & decision-rule robustness", y=1.02) |
| save(fig, "fig7_error_analysis", FIG) |
| print("saved fig7_error_analysis") |
|
|