cs3319-project2 / code /figures /fig8_lgcn_ensemble_correlation.py
NLP-beginner's picture
CS3319 Project 2 final deliverable (public F1 = 0.96626)
f28d994
Raw
History Blame Contribute Delete
1.43 kB
"""Fig 8 (appendix): correlation among the 8-seed LightGCN ensemble.
Lower pairwise correlation => more diverse members => the seed-ensemble gains
its headroom over any single model. Loads cached_scores/large_ensemble/model_lgcn_s*.npy.
"""
from pathlib import Path
import sys, glob, re
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sys.path.insert(0, str(Path(__file__).resolve().parent))
from plot_style import apply, save # noqa: E402
apply()
ROOT = Path(__file__).resolve().parents[2]
FIG = ROOT / "reports" / "figures"
LE = ROOT / "cached_scores" / "large_ensemble"
paths = sorted(glob.glob(str(LE / "model_lgcn_s*.npy")))
names = [re.search(r"_s(\d+)", p).group(1) for p in paths]
M = np.vstack([np.load(p).ravel() for p in paths])
corr = np.corrcoef(M)
fig, ax = plt.subplots(figsize=(7.2, 6))
sns.heatmap(corr, annot=True, fmt=".3f", xticklabels=names, yticklabels=names,
cmap="rocket_r", vmin=corr.min() - 0.001, vmax=1.0, square=True,
cbar_kws={"label": "Pearson r"}, ax=ax)
ax.set_xlabel("seed"); ax.set_ylabel("seed")
mean_offdiag = (corr.sum() - corr.shape[0]) / (corr.shape[0] * (corr.shape[0] - 1))
ax.set_title(f"LightGCN ensemble pairwise correlation\n(mean off-diagonal r = {mean_offdiag:.3f} -> diverse)")
save(fig, "fig8_lgcn_ensemble_correlation", FIG)
print(f"saved fig8_lgcn_ensemble_correlation (members={len(names)}, mean r={mean_offdiag:.3f})")