File size: 5,617 Bytes
bf620c6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
import csv
from collections import defaultdict
from statistics import mean, stdev
import math


def rename_method(label: str) -> str:
    """Map internal method keys to the short legend labels we want in the paper."""
    mapping = {
        'Spectral': 'Spectral Clustering',
        'Densest': 'Densest Subgraph',
        'QuasiClique': 'QuasiClique',
        'k-core': 'k-core',
        'L-RMC': 'L-RMC (ours)',
    }
    return mapping.get(label, label)


def load_rows(path):
    with open(path, 'r') as f:
        reader = csv.DictReader(f)
        return [row for row in reader]


def aggregate_f1_vs_pout(rows):
    # Group by (Method, p_out) across k and p_in
    group = defaultdict(list)
    for row in rows:
        method = row['Method']
        try:
            pout = float(row['ExternalDensity'])
            f1 = float(row['F1'])
        except (KeyError, ValueError):
            continue
        group[(method, pout)].append(f1)

    summary = defaultdict(dict)  # method -> pout -> (mean, ci95, n)
    for (method, pout), vals in group.items():
        n = len(vals)
        mu = mean(vals)
        if n > 1:
            s = stdev(vals)
        else:
            s = 0.0
        ci95 = 1.96 * s / math.sqrt(max(n, 1))
        summary[method][pout] = (mu, ci95, n)
    return summary


def emit_tikz(summary, out_path):
    # Prepare sorted p_outs and methods
    pouts = sorted({p for m in summary for p in summary[m].keys()})
    methods = [
        'L-RMC', 'QuasiClique', 'Spectral', 'Densest', 'k-core'
    ]
    methods = [m for m in methods if m in summary]

    # Colors list for methods
    colors = [
        'blue!80!black', 'orange!80!black', 'green!60!black', 'red!80!black', 'purple!70!black'
    ]

    with open(out_path, 'w') as w:
        w.write('% Auto-generated by make_appendix_f1_vs_pout_tikz.py\n')
        w.write('\\begin{figure}[h]\n')
        w.write('  \\centering\n')
        w.write('  \\begin{tikzpicture}\n')
        w.write('    \\begin{axis}[\n')
        w.write('      width=0.9\\linewidth, height=5cm,\n')
        w.write('      xlabel={$p_{\\text{out}}$}, ylabel={F1},\n')
        w.write('      xmin=0.22, xmax=0.48, ymin=0, ymax=1.02,\n')
        w.write('      grid=both,\n')
        w.write('      legend to name=f1legend,\n')
        w.write('      legend columns=3,\n')
        w.write('      legend cell align=left,\n')
        w.write('      legend image post style={xscale=1.2},\n')
        w.write('      legend style={draw=none, fill=none, font=\\scriptsize, /tikz/every even column/.append style={column sep=0.75em}}\n')
        w.write('    ]\n\n')

        for idx, method in enumerate(methods):
            label = rename_method(method)
            color = colors[idx % len(colors)]

            # Means and 95% CI bands
            coords = []
            upper = []
            lower = []
            for p in pouts:
                if p in summary[method]:
                    mu, ci95, _ = summary[method][p]
                    coords.append((p, mu))
                    upper.append((p, mu + ci95))
                    lower.append((p, mu - ci95))

            name = f"{method.replace('-', '').replace(' ', '')}"

            # Optional series comment for readability
            comment = label
            if method == 'L-RMC':
                comment = 'L-RMC (ours)'
            w.write(f"    % --- {comment}\n")

            # Mean line
            w.write(f"    \\addplot+[mark=o, thick, draw={color}]\n")
            w.write("      coordinates {")
            for (x, y) in coords:
                w.write(f" ({x:.2f},{y:.3f})")
            w.write("};\n")
            w.write(f"    \\addlegendentry{{{label}}}\n")

            # Upper and lower paths for shading
            w.write(f"    \\addplot[name path=upper{name}, draw=none, forget plot]\n")
            w.write("      coordinates {")
            for (x, y) in upper:
                w.write(f" ({x:.2f},{y:.3f})")
            w.write("};\n")
            w.write(f"    \\addplot[name path=lower{name}, draw=none, forget plot]\n")
            w.write("      coordinates {")
            for (x, y) in lower:
                w.write(f" ({x:.2f},{y:.3f})")
            w.write("};\n")

            # Fill between with specified style
            w.write(f"    \\addplot[fill={color}, fill opacity=0.30, draw=none, forget plot]\n")
            w.write(f"      fill between [of=upper{name} and lower{name}];\n\n")

        w.write('    \\end{axis}\n')
        w.write('  \\end{tikzpicture}\n\n')
        w.write('  % Place the short legend collected above\n')
        w.write('  \\pgfplotslegendfromname{f1legend}\n\n')
        w.write('  \\caption{F1 vs $p_{\\text{out}}$ (averaged over $k$ and $p_{\\text{in}}$). '
                'Shaded bands show 95\\% CIs. '
                'The baselines are fully described in Section~\\ref{sec:synthetic-planted}. '
                'Spectral Clustering uses the principal non-trivial eigenvector; '
                'Densest Subgraph is Charikar\'s peeling; '
                'k-core reports the densest component; '
                'QuasiClique optimizes edge density.}\n')
        w.write('\\end{figure}\n')
    print(f"Wrote {out_path}")


if __name__ == '__main__':
    import argparse
    ap = argparse.ArgumentParser()
    ap.add_argument('csv', help='Path to Table1 CSV (per-setting averages)')
    ap.add_argument('--out', default='appendix_f1_vs_pout.tikz', help='Output TikZ file to include')
    args = ap.parse_args()

    rows = load_rows(args.csv)
    summary = aggregate_f1_vs_pout(rows)
    emit_tikz(summary, args.out)