import csv from collections import defaultdict from statistics import mean, stdev import math def rename_method(label: str) -> str: """Map internal method keys to the short legend labels we want in the paper.""" mapping = { 'Spectral': 'Spectral Clustering', 'Densest': 'Densest Subgraph', 'QuasiClique': 'QuasiClique', 'k-core': 'k-core', 'L-RMC': 'L-RMC (ours)', } return mapping.get(label, label) def load_rows(path): with open(path, 'r') as f: reader = csv.DictReader(f) return [row for row in reader] def aggregate_f1_vs_pout(rows): # Group by (Method, p_out) across k and p_in group = defaultdict(list) for row in rows: method = row['Method'] try: pout = float(row['ExternalDensity']) f1 = float(row['F1']) except (KeyError, ValueError): continue group[(method, pout)].append(f1) summary = defaultdict(dict) # method -> pout -> (mean, ci95, n) for (method, pout), vals in group.items(): n = len(vals) mu = mean(vals) if n > 1: s = stdev(vals) else: s = 0.0 ci95 = 1.96 * s / math.sqrt(max(n, 1)) summary[method][pout] = (mu, ci95, n) return summary def emit_tikz(summary, out_path): # Prepare sorted p_outs and methods pouts = sorted({p for m in summary for p in summary[m].keys()}) methods = [ 'L-RMC', 'QuasiClique', 'Spectral', 'Densest', 'k-core' ] methods = [m for m in methods if m in summary] # Colors list for methods colors = [ 'blue!80!black', 'orange!80!black', 'green!60!black', 'red!80!black', 'purple!70!black' ] with open(out_path, 'w') as w: w.write('% Auto-generated by make_appendix_f1_vs_pout_tikz.py\n') w.write('\\begin{figure}[h]\n') w.write(' \\centering\n') w.write(' \\begin{tikzpicture}\n') w.write(' \\begin{axis}[\n') w.write(' width=0.9\\linewidth, height=5cm,\n') w.write(' xlabel={$p_{\\text{out}}$}, ylabel={F1},\n') w.write(' xmin=0.22, xmax=0.48, ymin=0, ymax=1.02,\n') w.write(' grid=both,\n') w.write(' legend to name=f1legend,\n') w.write(' legend columns=3,\n') w.write(' legend cell align=left,\n') w.write(' legend image post style={xscale=1.2},\n') w.write(' legend style={draw=none, fill=none, font=\\scriptsize, /tikz/every even column/.append style={column sep=0.75em}}\n') w.write(' ]\n\n') for idx, method in enumerate(methods): label = rename_method(method) color = colors[idx % len(colors)] # Means and 95% CI bands coords = [] upper = [] lower = [] for p in pouts: if p in summary[method]: mu, ci95, _ = summary[method][p] coords.append((p, mu)) upper.append((p, mu + ci95)) lower.append((p, mu - ci95)) name = f"{method.replace('-', '').replace(' ', '')}" # Optional series comment for readability comment = label if method == 'L-RMC': comment = 'L-RMC (ours)' w.write(f" % --- {comment}\n") # Mean line w.write(f" \\addplot+[mark=o, thick, draw={color}]\n") w.write(" coordinates {") for (x, y) in coords: w.write(f" ({x:.2f},{y:.3f})") w.write("};\n") w.write(f" \\addlegendentry{{{label}}}\n") # Upper and lower paths for shading w.write(f" \\addplot[name path=upper{name}, draw=none, forget plot]\n") w.write(" coordinates {") for (x, y) in upper: w.write(f" ({x:.2f},{y:.3f})") w.write("};\n") w.write(f" \\addplot[name path=lower{name}, draw=none, forget plot]\n") w.write(" coordinates {") for (x, y) in lower: w.write(f" ({x:.2f},{y:.3f})") w.write("};\n") # Fill between with specified style w.write(f" \\addplot[fill={color}, fill opacity=0.30, draw=none, forget plot]\n") w.write(f" fill between [of=upper{name} and lower{name}];\n\n") w.write(' \\end{axis}\n') w.write(' \\end{tikzpicture}\n\n') w.write(' % Place the short legend collected above\n') w.write(' \\pgfplotslegendfromname{f1legend}\n\n') w.write(' \\caption{F1 vs $p_{\\text{out}}$ (averaged over $k$ and $p_{\\text{in}}$). ' 'Shaded bands show 95\\% CIs. ' 'The baselines are fully described in Section~\\ref{sec:synthetic-planted}. ' 'Spectral Clustering uses the principal non-trivial eigenvector; ' 'Densest Subgraph is Charikar\'s peeling; ' 'k-core reports the densest component; ' 'QuasiClique optimizes edge density.}\n') w.write('\\end{figure}\n') print(f"Wrote {out_path}") if __name__ == '__main__': import argparse ap = argparse.ArgumentParser() ap.add_argument('csv', help='Path to Table1 CSV (per-setting averages)') ap.add_argument('--out', default='appendix_f1_vs_pout.tikz', help='Output TikZ file to include') args = ap.parse_args() rows = load_rows(args.csv) summary = aggregate_f1_vs_pout(rows) emit_tikz(summary, args.out)