"""Fig 10 (appendix): random-walk ensemble size vs validation F1 (diminishing returns).

Reads the best single DW/N2V config (small_ablation_table.csv), the 5-block ensemble
and the 7-block ensemble F1. Same base features throughout, so the comparison is fair.
"""
from pathlib import Path
import sys

import pandas as pd
import matplotlib.pyplot as plt

sys.path.insert(0, str(Path(__file__).resolve().parent))
from plot_style import apply, save, PALETTE_DEEP as C  # noqa: E402

apply()
ROOT = Path(__file__).resolve().parents[2]
FIG = ROOT / "reports" / "figures"
RW = ROOT / "validation_runs" / "dynamic_seed202" / "randomwalk_systematic"

single = pd.read_csv(RW / "small_ablation_table.csv").validation_F1.max()
e5 = pd.read_csv(RW / "ensemble_5_ablation.csv").validation_F1.iloc[0]
e7 = pd.read_csv(RW / "ensemble_7_ablation.csv").validation_F1.iloc[0]

sizes = [1, 5, 7]
f1s = [single, e5, e7]

fig, ax = plt.subplots(figsize=(7.2, 4.6))
ax.plot(sizes, f1s, "-o", color=C[2], lw=2, markersize=9)
for s, f in zip(sizes, f1s):
    ax.text(s, f + 0.00018, f"{f:.5f}", ha="center", fontsize=9.5, fontweight="bold")
ax.set_xlabel("# random-walk embedding configs in ensemble")
ax.set_ylabel("validation F1")
ax.set_title("Random-walk ensemble size vs F1 (same base features)")
ax.set_xticks(sizes)
ax.set_ylim(min(f1s) - 0.0006, max(f1s) + 0.0006)
ax.text(4.0, min(f1s) + 0.00005,
        f"single→7-block: +{e7-single:.5f}\n(used 7 in the final model)", fontsize=9, color="dimgray")
save(fig, "fig10_rw_ensemble_size", FIG)
print(f"saved fig10_rw_ensemble_size (single={single:.5f} e5={e5:.5f} e7={e7:.5f})")