"""Fig 6: cumulative F1 contribution by feature group (waterfall). Each bar is the validation-F1 gain from adding that feature group on top of the previous stage, measured on the fixed split_seed=202 split. The gains sum to 0.0283, exactly matching the total improvement LightGCN 0.9386 -> final 0.9669. Sources: post95/extra_score/content_rich/ node2vec_deepwalk/randomwalk_systematic/high_order ablation tables. """ from pathlib import Path import sys import matplotlib.pyplot as plt sys.path.insert(0, str(Path(__file__).resolve().parent)) from plot_style import apply, save, PALETTE_DEEP as C # noqa: E402 apply() ROOT = Path(__file__).resolve().parents[2] FIG = ROOT / "reports" / "figures" # (group, cumulative-F1-gain), sorted descending for readability groups = [ ("graph/meta-path stacking", 0.0174, C[0]), ("7-block random-walk ensemble", 0.0028, C[1]), ("DeepWalk + Node2Vec", 0.0022, C[2]), ("higher-order propagation", 0.0020, C[3]), ("BPR-MF", 0.0017, C[4]), ("neg/topk/variant scores", 0.0011, C[5]), ("rich content (18-d)", 0.0006, C[6]), ("content mean-cos", 0.0005, C[7]), ] names = [g[0] for g in groups] gains = [g[1] for g in groups] colors = [g[2] for g in groups] total = sum(gains) fig, ax = plt.subplots(figsize=(10, 5.4)) ypos = range(len(groups)) ax.barh(ypos, gains, color=colors) ax.set_yticks(list(ypos)) ax.set_yticklabels(names, fontsize=10) ax.invert_yaxis() for i, g in enumerate(gains): ax.text(g + 0.0003, i, f"+{g:.4f}", va="center", fontsize=9) ax.set_xlabel("ΔF1 (cumulative contribution on split_seed=202)") ax.set_xlim(0, 0.020) ax.set_title(f"Feature-group contribution to F1 (LightGCN 0.9386 → final 0.9669, ΣΔ = {total:.4f})") save(fig, "fig6_feature_contribution", FIG) print(f"saved fig6_feature_contribution (sum of gains = {total:.4f}, expected 0.0283)")