import csv from collections import defaultdict from statistics import mean, stdev import math import matplotlib matplotlib.use("Agg") import matplotlib.pyplot as plt def rename_method(label: str) -> str: # Use the same short labels as the TikZ version mapping = { 'Spectral': 'Spectral Clustering', 'Densest': 'Densest Subgraph', 'QuasiClique': 'QuasiClique', 'k-core': 'k-core', 'L-RMC': 'L-RMC (ours)', } return mapping.get(label, label) def load_rows(path): with open(path, 'r') as f: reader = csv.DictReader(f) return [row for row in reader] def aggregate_f1_vs_pout(rows): # Group by (Method, p_out) across k and p_in group = defaultdict(list) for row in rows: method = row['Method'] try: pout = float(row['ExternalDensity']) f1 = float(row['F1']) except (KeyError, ValueError): continue group[(method, pout)].append(f1) summary = defaultdict(dict) # method -> pout -> (mean, ci95, n) for (method, pout), vals in group.items(): n = len(vals) mu = mean(vals) if n > 1: s = stdev(vals) else: s = 0.0 ci95 = 1.96 * s / math.sqrt(max(n, 1)) summary[method][pout] = (mu, ci95, n) return summary def plot(summary, out_path): pouts = sorted({p for m in summary for p in summary[m].keys()}) methods = sorted(summary.keys()) plt.figure(figsize=(6.0, 3.4), dpi=200) for method in methods: means = [] ci = [] for p in pouts: mu, ci95, _ = summary[method].get(p, (float('nan'), 0.0, 0)) means.append(mu) ci.append(ci95) label = rename_method(method) plt.plot(pouts, means, marker='o', linewidth=2, markersize=4, label=label) # Shaded 95% CI lower = [m - c for m, c in zip(means, ci)] upper = [m + c for m, c in zip(means, ci)] plt.fill_between(pouts, lower, upper, alpha=0.15) plt.xlabel(r"$p_{\text{out}}$") plt.ylabel("F1") plt.title("F1 vs $p_{\\text{out}}$ (avg over $k,p_{\\text{in}}$)") plt.grid(True, alpha=0.3) plt.legend(fontsize=7, frameon=False) plt.tight_layout() plt.savefig(out_path, bbox_inches='tight') print(f"Saved {out_path}") if __name__ == '__main__': import argparse ap = argparse.ArgumentParser() ap.add_argument('csv', help='Path to Table1 CSV (per-setting averages)') ap.add_argument('--out', default='appendix_f1_vs_pout.png', help='Output PNG path') args = ap.parse_args() rows = load_rows(args.csv) summary = aggregate_f1_vs_pout(rows) plot(summary, args.out)