cs3319-project2 / code /figures /fig6_feature_contribution.py
NLP-beginner's picture
CS3319 Project 2 final deliverable (public F1 = 0.96626)
f28d994
Raw
History Blame Contribute Delete
1.92 kB
"""Fig 6: cumulative F1 contribution by feature group (waterfall).
Each bar is the validation-F1 gain from adding that feature group on top of the previous
stage, measured on the fixed split_seed=202 split. The gains sum to 0.0283, exactly matching
the total improvement LightGCN 0.9386 -> final 0.9669. Sources: post95/extra_score/content_rich/
node2vec_deepwalk/randomwalk_systematic/high_order ablation tables.
"""
from pathlib import Path
import sys
import matplotlib.pyplot as plt
sys.path.insert(0, str(Path(__file__).resolve().parent))
from plot_style import apply, save, PALETTE_DEEP as C # noqa: E402
apply()
ROOT = Path(__file__).resolve().parents[2]
FIG = ROOT / "reports" / "figures"
# (group, cumulative-F1-gain), sorted descending for readability
groups = [
("graph/meta-path stacking", 0.0174, C[0]),
("7-block random-walk ensemble", 0.0028, C[1]),
("DeepWalk + Node2Vec", 0.0022, C[2]),
("higher-order propagation", 0.0020, C[3]),
("BPR-MF", 0.0017, C[4]),
("neg/topk/variant scores", 0.0011, C[5]),
("rich content (18-d)", 0.0006, C[6]),
("content mean-cos", 0.0005, C[7]),
]
names = [g[0] for g in groups]
gains = [g[1] for g in groups]
colors = [g[2] for g in groups]
total = sum(gains)
fig, ax = plt.subplots(figsize=(10, 5.4))
ypos = range(len(groups))
ax.barh(ypos, gains, color=colors)
ax.set_yticks(list(ypos))
ax.set_yticklabels(names, fontsize=10)
ax.invert_yaxis()
for i, g in enumerate(gains):
ax.text(g + 0.0003, i, f"+{g:.4f}", va="center", fontsize=9)
ax.set_xlabel("ΔF1 (cumulative contribution on split_seed=202)")
ax.set_xlim(0, 0.020)
ax.set_title(f"Feature-group contribution to F1 (LightGCN 0.9386 → final 0.9669, ΣΔ = {total:.4f})")
save(fig, "fig6_feature_contribution", FIG)
print(f"saved fig6_feature_contribution (sum of gains = {total:.4f}, expected 0.0283)")