|
|
import csv |
|
|
from collections import defaultdict |
|
|
from statistics import mean, stdev |
|
|
import math |
|
|
import matplotlib |
|
|
matplotlib.use("Agg") |
|
|
import matplotlib.pyplot as plt |
|
|
|
|
|
|
|
|
def rename_method(label: str) -> str: |
|
|
|
|
|
mapping = { |
|
|
'Spectral': 'Spectral Clustering', |
|
|
'Densest': 'Densest Subgraph', |
|
|
'QuasiClique': 'QuasiClique', |
|
|
'k-core': 'k-core', |
|
|
'L-RMC': 'L-RMC (ours)', |
|
|
} |
|
|
return mapping.get(label, label) |
|
|
|
|
|
|
|
|
def load_rows(path): |
|
|
with open(path, 'r') as f: |
|
|
reader = csv.DictReader(f) |
|
|
return [row for row in reader] |
|
|
|
|
|
|
|
|
def aggregate_f1_vs_pout(rows): |
|
|
|
|
|
group = defaultdict(list) |
|
|
for row in rows: |
|
|
method = row['Method'] |
|
|
try: |
|
|
pout = float(row['ExternalDensity']) |
|
|
f1 = float(row['F1']) |
|
|
except (KeyError, ValueError): |
|
|
continue |
|
|
group[(method, pout)].append(f1) |
|
|
|
|
|
summary = defaultdict(dict) |
|
|
for (method, pout), vals in group.items(): |
|
|
n = len(vals) |
|
|
mu = mean(vals) |
|
|
if n > 1: |
|
|
s = stdev(vals) |
|
|
else: |
|
|
s = 0.0 |
|
|
ci95 = 1.96 * s / math.sqrt(max(n, 1)) |
|
|
summary[method][pout] = (mu, ci95, n) |
|
|
return summary |
|
|
|
|
|
|
|
|
def plot(summary, out_path): |
|
|
pouts = sorted({p for m in summary for p in summary[m].keys()}) |
|
|
methods = sorted(summary.keys()) |
|
|
|
|
|
plt.figure(figsize=(6.0, 3.4), dpi=200) |
|
|
|
|
|
for method in methods: |
|
|
means = [] |
|
|
ci = [] |
|
|
for p in pouts: |
|
|
mu, ci95, _ = summary[method].get(p, (float('nan'), 0.0, 0)) |
|
|
means.append(mu) |
|
|
ci.append(ci95) |
|
|
|
|
|
label = rename_method(method) |
|
|
plt.plot(pouts, means, marker='o', linewidth=2, markersize=4, label=label) |
|
|
|
|
|
lower = [m - c for m, c in zip(means, ci)] |
|
|
upper = [m + c for m, c in zip(means, ci)] |
|
|
plt.fill_between(pouts, lower, upper, alpha=0.15) |
|
|
|
|
|
plt.xlabel(r"$p_{\text{out}}$") |
|
|
plt.ylabel("F1") |
|
|
plt.title("F1 vs $p_{\\text{out}}$ (avg over $k,p_{\\text{in}}$)") |
|
|
plt.grid(True, alpha=0.3) |
|
|
plt.legend(fontsize=7, frameon=False) |
|
|
plt.tight_layout() |
|
|
plt.savefig(out_path, bbox_inches='tight') |
|
|
print(f"Saved {out_path}") |
|
|
|
|
|
|
|
|
if __name__ == '__main__': |
|
|
import argparse |
|
|
ap = argparse.ArgumentParser() |
|
|
ap.add_argument('csv', help='Path to Table1 CSV (per-setting averages)') |
|
|
ap.add_argument('--out', default='appendix_f1_vs_pout.png', help='Output PNG path') |
|
|
args = ap.parse_args() |
|
|
|
|
|
rows = load_rows(args.csv) |
|
|
summary = aggregate_f1_vs_pout(rows) |
|
|
plot(summary, args.out) |
|
|
|