"""Fig 8 (appendix): correlation among the 8-seed LightGCN ensemble. Lower pairwise correlation => more diverse members => the seed-ensemble gains its headroom over any single model. Loads cached_scores/large_ensemble/model_lgcn_s*.npy. """ from pathlib import Path import sys, glob, re import numpy as np import matplotlib.pyplot as plt import seaborn as sns sys.path.insert(0, str(Path(__file__).resolve().parent)) from plot_style import apply, save # noqa: E402 apply() ROOT = Path(__file__).resolve().parents[2] FIG = ROOT / "reports" / "figures" LE = ROOT / "cached_scores" / "large_ensemble" paths = sorted(glob.glob(str(LE / "model_lgcn_s*.npy"))) names = [re.search(r"_s(\d+)", p).group(1) for p in paths] M = np.vstack([np.load(p).ravel() for p in paths]) corr = np.corrcoef(M) fig, ax = plt.subplots(figsize=(7.2, 6)) sns.heatmap(corr, annot=True, fmt=".3f", xticklabels=names, yticklabels=names, cmap="rocket_r", vmin=corr.min() - 0.001, vmax=1.0, square=True, cbar_kws={"label": "Pearson r"}, ax=ax) ax.set_xlabel("seed"); ax.set_ylabel("seed") mean_offdiag = (corr.sum() - corr.shape[0]) / (corr.shape[0] * (corr.shape[0] - 1)) ax.set_title(f"LightGCN ensemble pairwise correlation\n(mean off-diagonal r = {mean_offdiag:.3f} -> diverse)") save(fig, "fig8_lgcn_ensemble_correlation", FIG) print(f"saved fig8_lgcn_ensemble_correlation (members={len(names)}, mean r={mean_offdiag:.3f})")