clique / src /synthetic /Table1_utils /make_appendix_f1_vs_pout_tikz.py

qingy2024

Upload folder using huggingface_hub

bf620c6 verified 6 months ago

5.62 kB

	import csv
	from collections import defaultdict
	from statistics import mean, stdev
	import math


	def rename_method(label: str) -> str:
	"""Map internal method keys to the short legend labels we want in the paper."""
	mapping = {
	'Spectral': 'Spectral Clustering',
	'Densest': 'Densest Subgraph',
	'QuasiClique': 'QuasiClique',
	'k-core': 'k-core',
	'L-RMC': 'L-RMC (ours)',
	}
	return mapping.get(label, label)


	def load_rows(path):
	with open(path, 'r') as f:
	reader = csv.DictReader(f)
	return [row for row in reader]


	def aggregate_f1_vs_pout(rows):
	# Group by (Method, p_out) across k and p_in
	group = defaultdict(list)
	for row in rows:
	method = row['Method']
	try:
	pout = float(row['ExternalDensity'])
	f1 = float(row['F1'])
	except (KeyError, ValueError):
	continue
	group[(method, pout)].append(f1)

	summary = defaultdict(dict) # method -> pout -> (mean, ci95, n)
	for (method, pout), vals in group.items():
	n = len(vals)
	mu = mean(vals)
	if n > 1:
	s = stdev(vals)
	else:
	s = 0.0
	ci95 = 1.96 * s / math.sqrt(max(n, 1))
	summary[method][pout] = (mu, ci95, n)
	return summary


	def emit_tikz(summary, out_path):
	# Prepare sorted p_outs and methods
	pouts = sorted({p for m in summary for p in summary[m].keys()})
	methods = [
	'L-RMC', 'QuasiClique', 'Spectral', 'Densest', 'k-core'
	]
	methods = [m for m in methods if m in summary]

	# Colors list for methods
	colors = [
	'blue!80!black', 'orange!80!black', 'green!60!black', 'red!80!black', 'purple!70!black'
	]

	with open(out_path, 'w') as w:
	w.write('% Auto-generated by make_appendix_f1_vs_pout_tikz.py\n')
	w.write('\\begin{figure}[h]\n')
	w.write(' \\centering\n')
	w.write(' \\begin{tikzpicture}\n')
	w.write(' \\begin{axis}[\n')
	w.write(' width=0.9\\linewidth, height=5cm,\n')
	w.write(' xlabel={$p_{\\text{out}}$}, ylabel={F1},\n')
	w.write(' xmin=0.22, xmax=0.48, ymin=0, ymax=1.02,\n')
	w.write(' grid=both,\n')
	w.write(' legend to name=f1legend,\n')
	w.write(' legend columns=3,\n')
	w.write(' legend cell align=left,\n')
	w.write(' legend image post style={xscale=1.2},\n')
	w.write(' legend style={draw=none, fill=none, font=\\scriptsize, /tikz/every even column/.append style={column sep=0.75em}}\n')
	w.write(' ]\n\n')

	for idx, method in enumerate(methods):
	label = rename_method(method)
	color = colors[idx % len(colors)]

	# Means and 95% CI bands
	coords = []
	upper = []
	lower = []
	for p in pouts:
	if p in summary[method]:
	mu, ci95, _ = summary[method][p]
	coords.append((p, mu))
	upper.append((p, mu + ci95))
	lower.append((p, mu - ci95))

	name = f"{method.replace('-', '').replace(' ', '')}"

	# Optional series comment for readability
	comment = label
	if method == 'L-RMC':
	comment = 'L-RMC (ours)'
	w.write(f" % --- {comment}\n")

	# Mean line
	w.write(f" \\addplot+[mark=o, thick, draw={color}]\n")
	w.write(" coordinates {")
	for (x, y) in coords:
	w.write(f" ({x:.2f},{y:.3f})")
	w.write("};\n")
	w.write(f" \\addlegendentry{{{label}}}\n")

	# Upper and lower paths for shading
	w.write(f" \\addplot[name path=upper{name}, draw=none, forget plot]\n")
	w.write(" coordinates {")
	for (x, y) in upper:
	w.write(f" ({x:.2f},{y:.3f})")
	w.write("};\n")
	w.write(f" \\addplot[name path=lower{name}, draw=none, forget plot]\n")
	w.write(" coordinates {")
	for (x, y) in lower:
	w.write(f" ({x:.2f},{y:.3f})")
	w.write("};\n")

	# Fill between with specified style
	w.write(f" \\addplot[fill={color}, fill opacity=0.30, draw=none, forget plot]\n")
	w.write(f" fill between [of=upper{name} and lower{name}];\n\n")

	w.write(' \\end{axis}\n')
	w.write(' \\end{tikzpicture}\n\n')
	w.write(' % Place the short legend collected above\n')
	w.write(' \\pgfplotslegendfromname{f1legend}\n\n')
	w.write(' \\caption{F1 vs $p_{\\text{out}}$ (averaged over $k$ and $p_{\\text{in}}$). '
	'Shaded bands show 95\\% CIs. '
	'The baselines are fully described in Section~\\ref{sec:synthetic-planted}. '
	'Spectral Clustering uses the principal non-trivial eigenvector; '
	'Densest Subgraph is Charikar\'s peeling; '
	'k-core reports the densest component; '
	'QuasiClique optimizes edge density.}\n')
	w.write('\\end{figure}\n')
	print(f"Wrote {out_path}")


	if __name__ == '__main__':
	import argparse
	ap = argparse.ArgumentParser()
	ap.add_argument('csv', help='Path to Table1 CSV (per-setting averages)')
	ap.add_argument('--out', default='appendix_f1_vs_pout.tikz', help='Output TikZ file to include')
	args = ap.parse_args()

	rows = load_rows(args.csv)
	summary = aggregate_f1_vs_pout(rows)
	emit_tikz(summary, args.out)