File size: 1,917 Bytes
f28d994
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
"""Fig 6: cumulative F1 contribution by feature group (waterfall).

Each bar is the validation-F1 gain from adding that feature group on top of the previous
stage, measured on the fixed split_seed=202 split. The gains sum to 0.0283, exactly matching
the total improvement LightGCN 0.9386 -> final 0.9669. Sources: post95/extra_score/content_rich/
node2vec_deepwalk/randomwalk_systematic/high_order ablation tables.
"""
from pathlib import Path
import sys

import matplotlib.pyplot as plt

sys.path.insert(0, str(Path(__file__).resolve().parent))
from plot_style import apply, save, PALETTE_DEEP as C  # noqa: E402

apply()
ROOT = Path(__file__).resolve().parents[2]
FIG = ROOT / "reports" / "figures"

# (group, cumulative-F1-gain), sorted descending for readability
groups = [
    ("graph/meta-path stacking",     0.0174, C[0]),
    ("7-block random-walk ensemble", 0.0028, C[1]),
    ("DeepWalk + Node2Vec",          0.0022, C[2]),
    ("higher-order propagation",     0.0020, C[3]),
    ("BPR-MF",                       0.0017, C[4]),
    ("neg/topk/variant scores",      0.0011, C[5]),
    ("rich content (18-d)",          0.0006, C[6]),
    ("content mean-cos",             0.0005, C[7]),
]
names = [g[0] for g in groups]
gains = [g[1] for g in groups]
colors = [g[2] for g in groups]
total = sum(gains)

fig, ax = plt.subplots(figsize=(10, 5.4))
ypos = range(len(groups))
ax.barh(ypos, gains, color=colors)
ax.set_yticks(list(ypos))
ax.set_yticklabels(names, fontsize=10)
ax.invert_yaxis()
for i, g in enumerate(gains):
    ax.text(g + 0.0003, i, f"+{g:.4f}", va="center", fontsize=9)
ax.set_xlabel("ΔF1 (cumulative contribution on split_seed=202)")
ax.set_xlim(0, 0.020)
ax.set_title(f"Feature-group contribution to F1  (LightGCN 0.9386 → final 0.9669, ΣΔ = {total:.4f})")
save(fig, "fig6_feature_contribution", FIG)
print(f"saved fig6_feature_contribution (sum of gains = {total:.4f}, expected 0.0283)")