"""Fig 10 (appendix): random-walk ensemble size vs validation F1 (diminishing returns). Reads the best single DW/N2V config (small_ablation_table.csv), the 5-block ensemble and the 7-block ensemble F1. Same base features throughout, so the comparison is fair. """ from pathlib import Path import sys import pandas as pd import matplotlib.pyplot as plt sys.path.insert(0, str(Path(__file__).resolve().parent)) from plot_style import apply, save, PALETTE_DEEP as C # noqa: E402 apply() ROOT = Path(__file__).resolve().parents[2] FIG = ROOT / "reports" / "figures" RW = ROOT / "validation_runs" / "dynamic_seed202" / "randomwalk_systematic" single = pd.read_csv(RW / "small_ablation_table.csv").validation_F1.max() e5 = pd.read_csv(RW / "ensemble_5_ablation.csv").validation_F1.iloc[0] e7 = pd.read_csv(RW / "ensemble_7_ablation.csv").validation_F1.iloc[0] sizes = [1, 5, 7] f1s = [single, e5, e7] fig, ax = plt.subplots(figsize=(7.2, 4.6)) ax.plot(sizes, f1s, "-o", color=C[2], lw=2, markersize=9) for s, f in zip(sizes, f1s): ax.text(s, f + 0.00018, f"{f:.5f}", ha="center", fontsize=9.5, fontweight="bold") ax.set_xlabel("# random-walk embedding configs in ensemble") ax.set_ylabel("validation F1") ax.set_title("Random-walk ensemble size vs F1 (same base features)") ax.set_xticks(sizes) ax.set_ylim(min(f1s) - 0.0006, max(f1s) + 0.0006) ax.text(4.0, min(f1s) + 0.00005, f"single→7-block: +{e7-single:.5f}\n(used 7 in the final model)", fontsize=9, color="dimgray") save(fig, "fig10_rw_ensemble_size", FIG) print(f"saved fig10_rw_ensemble_size (single={single:.5f} e5={e5:.5f} e7={e7:.5f})")