File size: 5,617 Bytes
bf620c6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 | import csv
from collections import defaultdict
from statistics import mean, stdev
import math
def rename_method(label: str) -> str:
"""Map internal method keys to the short legend labels we want in the paper."""
mapping = {
'Spectral': 'Spectral Clustering',
'Densest': 'Densest Subgraph',
'QuasiClique': 'QuasiClique',
'k-core': 'k-core',
'L-RMC': 'L-RMC (ours)',
}
return mapping.get(label, label)
def load_rows(path):
with open(path, 'r') as f:
reader = csv.DictReader(f)
return [row for row in reader]
def aggregate_f1_vs_pout(rows):
# Group by (Method, p_out) across k and p_in
group = defaultdict(list)
for row in rows:
method = row['Method']
try:
pout = float(row['ExternalDensity'])
f1 = float(row['F1'])
except (KeyError, ValueError):
continue
group[(method, pout)].append(f1)
summary = defaultdict(dict) # method -> pout -> (mean, ci95, n)
for (method, pout), vals in group.items():
n = len(vals)
mu = mean(vals)
if n > 1:
s = stdev(vals)
else:
s = 0.0
ci95 = 1.96 * s / math.sqrt(max(n, 1))
summary[method][pout] = (mu, ci95, n)
return summary
def emit_tikz(summary, out_path):
# Prepare sorted p_outs and methods
pouts = sorted({p for m in summary for p in summary[m].keys()})
methods = [
'L-RMC', 'QuasiClique', 'Spectral', 'Densest', 'k-core'
]
methods = [m for m in methods if m in summary]
# Colors list for methods
colors = [
'blue!80!black', 'orange!80!black', 'green!60!black', 'red!80!black', 'purple!70!black'
]
with open(out_path, 'w') as w:
w.write('% Auto-generated by make_appendix_f1_vs_pout_tikz.py\n')
w.write('\\begin{figure}[h]\n')
w.write(' \\centering\n')
w.write(' \\begin{tikzpicture}\n')
w.write(' \\begin{axis}[\n')
w.write(' width=0.9\\linewidth, height=5cm,\n')
w.write(' xlabel={$p_{\\text{out}}$}, ylabel={F1},\n')
w.write(' xmin=0.22, xmax=0.48, ymin=0, ymax=1.02,\n')
w.write(' grid=both,\n')
w.write(' legend to name=f1legend,\n')
w.write(' legend columns=3,\n')
w.write(' legend cell align=left,\n')
w.write(' legend image post style={xscale=1.2},\n')
w.write(' legend style={draw=none, fill=none, font=\\scriptsize, /tikz/every even column/.append style={column sep=0.75em}}\n')
w.write(' ]\n\n')
for idx, method in enumerate(methods):
label = rename_method(method)
color = colors[idx % len(colors)]
# Means and 95% CI bands
coords = []
upper = []
lower = []
for p in pouts:
if p in summary[method]:
mu, ci95, _ = summary[method][p]
coords.append((p, mu))
upper.append((p, mu + ci95))
lower.append((p, mu - ci95))
name = f"{method.replace('-', '').replace(' ', '')}"
# Optional series comment for readability
comment = label
if method == 'L-RMC':
comment = 'L-RMC (ours)'
w.write(f" % --- {comment}\n")
# Mean line
w.write(f" \\addplot+[mark=o, thick, draw={color}]\n")
w.write(" coordinates {")
for (x, y) in coords:
w.write(f" ({x:.2f},{y:.3f})")
w.write("};\n")
w.write(f" \\addlegendentry{{{label}}}\n")
# Upper and lower paths for shading
w.write(f" \\addplot[name path=upper{name}, draw=none, forget plot]\n")
w.write(" coordinates {")
for (x, y) in upper:
w.write(f" ({x:.2f},{y:.3f})")
w.write("};\n")
w.write(f" \\addplot[name path=lower{name}, draw=none, forget plot]\n")
w.write(" coordinates {")
for (x, y) in lower:
w.write(f" ({x:.2f},{y:.3f})")
w.write("};\n")
# Fill between with specified style
w.write(f" \\addplot[fill={color}, fill opacity=0.30, draw=none, forget plot]\n")
w.write(f" fill between [of=upper{name} and lower{name}];\n\n")
w.write(' \\end{axis}\n')
w.write(' \\end{tikzpicture}\n\n')
w.write(' % Place the short legend collected above\n')
w.write(' \\pgfplotslegendfromname{f1legend}\n\n')
w.write(' \\caption{F1 vs $p_{\\text{out}}$ (averaged over $k$ and $p_{\\text{in}}$). '
'Shaded bands show 95\\% CIs. '
'The baselines are fully described in Section~\\ref{sec:synthetic-planted}. '
'Spectral Clustering uses the principal non-trivial eigenvector; '
'Densest Subgraph is Charikar\'s peeling; '
'k-core reports the densest component; '
'QuasiClique optimizes edge density.}\n')
w.write('\\end{figure}\n')
print(f"Wrote {out_path}")
if __name__ == '__main__':
import argparse
ap = argparse.ArgumentParser()
ap.add_argument('csv', help='Path to Table1 CSV (per-setting averages)')
ap.add_argument('--out', default='appendix_f1_vs_pout.tikz', help='Output TikZ file to include')
args = ap.parse_args()
rows = load_rows(args.csv)
summary = aggregate_f1_vs_pout(rows)
emit_tikz(summary, args.out)
|