File size: 2,547 Bytes
f28d994
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
"""Fig 3 (hero figure): method performance progression — the 'three-step leap'.

Validation F1 on the fixed split_seed=202 split; public-leaderboard F1 from the README
stage table. Sources for each point are noted inline.
"""
from pathlib import Path
import sys

import numpy as np
import matplotlib.pyplot as plt

sys.path.insert(0, str(Path(__file__).resolve().parent))
from plot_style import apply, save, PALETTE_DEEP as C  # noqa: E402

apply()
ROOT = Path(__file__).resolve().parents[2]
FIG = ROOT / "reports" / "figures"

# (label, val_F1, public_LB_F1_or_None)
stages = [
    ("Official\nbaseline",            0.885, None),     # notebook baseline (exp. history)
    ("LightGCN\n(single)",            0.9386, 0.9304),  # dynamic_summary.csv / 6-model ens LB
    ("+ graph/meta-path\nstacking",   0.9560, 0.9576),  # post95 ablation_table baseline_stacking
    ("+ content\n+ BPR-MF",           0.9593, 0.9600),  # extra_score_ablation +bpr_mf
    ("+ rich content",                0.9599, None),    # content_rich_ablation
    ("+ DeepWalk\n+ Node2Vec",        0.9621, 0.9625),  # node2vec_deepwalk_ablation +node2vec
    ("+ 7-block RW\nensemble",        0.9649, None),    # ensemble_7_ablation
    ("+ high-order\npropagation",     0.9669, 0.9663),  # high_order validation_summary (final) / LB
]
labels = [s[0] for s in stages]
val = np.array([s[1] for s in stages])
lb_x = [i for i, s in enumerate(stages) if s[2] is not None]
lb_y = [s[2] for s in stages if s[2] is not None]

fig, ax = plt.subplots(figsize=(11.5, 5.4))
x = np.arange(len(stages))
ax.plot(x, val, "-o", color=C[0], label="Validation F1", zorder=3, markersize=7)
ax.plot(lb_x, lb_y, "--s", color=C[3], label="Public leaderboard F1", zorder=3, markersize=6)

for i, v in enumerate(val):
    ax.annotate(f"{v:.3f}", (i, v), textcoords="offset points", xytext=(0, 9),
                ha="center", fontsize=8.6)

# three breakthrough brackets
leaps = [(1, 2, "+0.017\nstacking"), (5, 6, "+0.003\nrandom-walk"), (6, 7, "+0.002\nhigh-order")]
y0 = 0.978
for a, b, txt in leaps:
    ax.annotate("", xy=(b, y0), xytext=(a, y0),
                arrowprops=dict(arrowstyle="-|>", color=C[2], lw=1.6))
    ax.text((a + b) / 2, y0 + 0.0015, txt, ha="center", fontsize=8, color=C[2])

ax.set_xticks(x)
ax.set_xticklabels(labels, fontsize=8.6)
ax.set_ylabel("F1 score")
ax.set_ylim(0.86, 0.99)
ax.set_title("Performance progression: three breakthroughs (validation split seed = 202)")
ax.legend(loc="lower right")
save(fig, "fig3_progression", FIG)
print("saved fig3_progression")