|
|
package synthetic; |
|
|
|
|
|
import java.io.*; |
|
|
import java.util.*; |
|
|
|
|
|
public class Table1Synthetic { |
|
|
private static final int MIN_SIZE_FOR_RANKING_METHODS = 10; |
|
|
|
|
|
static class SyntheticGraph { |
|
|
List<Integer>[] adj; |
|
|
Set<Integer> plantedCluster; |
|
|
int n, m; |
|
|
|
|
|
@SuppressWarnings("unchecked") |
|
|
SyntheticGraph(int n) { |
|
|
this.n = n; |
|
|
this.adj = new ArrayList[n + 1]; |
|
|
for (int i = 1; i <= n; i++) { |
|
|
adj[i] = new ArrayList<>(); |
|
|
} |
|
|
this.plantedCluster = new HashSet<>(); |
|
|
this.m = 0; |
|
|
} |
|
|
|
|
|
void addEdge(int u, int v) { |
|
|
if (u != v && !adj[u].contains(v)) { |
|
|
adj[u].add(v); |
|
|
adj[v].add(u); |
|
|
m++; |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
static class AggregatedResult { |
|
|
double precision, recall, f1; |
|
|
double minDeg, avgDeg, maxDeg, density; |
|
|
long rmcScore; |
|
|
long runtimeMs; |
|
|
int count; |
|
|
|
|
|
AggregatedResult() { |
|
|
precision = recall = f1 = 0; |
|
|
minDeg = avgDeg = maxDeg = density = 0; |
|
|
rmcScore = 0; |
|
|
runtimeMs = 0; |
|
|
count = 0; |
|
|
} |
|
|
|
|
|
void addResult(EvaluationResult r) { |
|
|
precision += r.precision; |
|
|
recall += r.recall; |
|
|
f1 += r.f1; |
|
|
minDeg += r.minDeg; |
|
|
avgDeg += r.avgDeg; |
|
|
maxDeg += r.maxDeg; |
|
|
density += r.density; |
|
|
rmcScore += r.found.size() * r.minDeg; |
|
|
runtimeMs += r.runtimeMs; |
|
|
count++; |
|
|
} |
|
|
|
|
|
void average() { |
|
|
if (count > 0) { |
|
|
precision /= count; |
|
|
recall /= count; |
|
|
f1 /= count; |
|
|
minDeg /= count; |
|
|
avgDeg /= count; |
|
|
maxDeg /= count; |
|
|
density /= count; |
|
|
rmcScore /= count; |
|
|
runtimeMs /= count; |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
static class EvaluationResult { |
|
|
Set<Integer> found; |
|
|
double precision, recall, f1; |
|
|
double minDeg, avgDeg, maxDeg, density; |
|
|
long runtimeMs; |
|
|
|
|
|
EvaluationResult(Set<Integer> found, Set<Integer> planted, long runtime) { |
|
|
this.found = found; |
|
|
this.runtimeMs = runtime; |
|
|
|
|
|
if (found.isEmpty()) { |
|
|
precision = recall = f1 = 0.0; |
|
|
minDeg = avgDeg = maxDeg = density = 0.0; |
|
|
} else { |
|
|
Set<Integer> intersection = new HashSet<>(found); |
|
|
intersection.retainAll(planted); |
|
|
|
|
|
precision = (double) intersection.size() / found.size(); |
|
|
recall = planted.isEmpty() ? 0.0 : (double) intersection.size() / planted.size(); |
|
|
f1 = (precision + recall == 0) ? 0.0 : 2 * precision * recall / (precision + recall); |
|
|
} |
|
|
} |
|
|
|
|
|
void computeStats(List<Integer>[] adj) { |
|
|
if (found.isEmpty()) { |
|
|
minDeg = avgDeg = maxDeg = density = 0.0; |
|
|
return; |
|
|
} |
|
|
|
|
|
int totalDeg = 0; |
|
|
int minD = Integer.MAX_VALUE; |
|
|
int maxD = Integer.MIN_VALUE; |
|
|
int edges = 0; |
|
|
|
|
|
for (int u : found) { |
|
|
int deg = 0; |
|
|
for (int v : adj[u]) { |
|
|
if (found.contains(v)) { |
|
|
deg++; |
|
|
if (u < v) edges++; |
|
|
} |
|
|
} |
|
|
totalDeg += deg; |
|
|
minD = Math.min(minD, deg); |
|
|
maxD = Math.max(maxD, deg); |
|
|
} |
|
|
|
|
|
minDeg = minD; |
|
|
maxDeg = maxD; |
|
|
avgDeg = (double) totalDeg / found.size(); |
|
|
int maxEdges = found.size() * (found.size() - 1) / 2; |
|
|
density = maxEdges == 0 ? 0.0 : (double) edges / maxEdges; |
|
|
} |
|
|
} |
|
|
|
|
|
static EvaluationResult runSingleLRMC(SyntheticGraph g, double eps) { |
|
|
long start = System.nanoTime(); |
|
|
|
|
|
clique2_mk_benchmark_accuracy.Result result = |
|
|
clique2_mk_benchmark_accuracy.runLaplacianRMC(g.adj, eps); |
|
|
|
|
|
long end = System.nanoTime(); |
|
|
|
|
|
Set<Integer> found = (result.bestComponent != null) ? result.bestComponent : new HashSet<>(); |
|
|
EvaluationResult evalResult = new EvaluationResult(found, g.plantedCluster, (end - start) / 1_000_000); |
|
|
evalResult.computeStats(g.adj); |
|
|
return evalResult; |
|
|
} |
|
|
|
|
|
static EvaluationResult runSpectralBaseline(SyntheticGraph g) { |
|
|
long start = System.nanoTime(); |
|
|
|
|
|
double[] u = topEigenvectorAdjNormalized2nd(g.adj); |
|
|
Integer[] ordDesc = new Integer[g.n]; |
|
|
Integer[] ordAsc = new Integer[g.n]; |
|
|
for (int i = 0; i < g.n; i++) { ordDesc[i] = i + 1; ordAsc[i] = i + 1; } |
|
|
Arrays.sort(ordDesc, (a, b) -> Double.compare(u[b], u[a])); |
|
|
Arrays.sort(ordAsc, (a, b) -> Double.compare(u[a], u[b])); |
|
|
|
|
|
Set<Integer> candDesc = sweepDensityWithMin(ordDesc, g.adj, MIN_SIZE_FOR_RANKING_METHODS); |
|
|
Set<Integer> candAsc = sweepDensityWithMin(ordAsc, g.adj, MIN_SIZE_FOR_RANKING_METHODS); |
|
|
|
|
|
Set<Integer> cand = betterByDensity(candDesc, candAsc, g.adj); |
|
|
|
|
|
long end = System.nanoTime(); |
|
|
EvaluationResult ev = new EvaluationResult(cand, g.plantedCluster, (end - start) / 1_000_000); |
|
|
ev.computeStats(g.adj); |
|
|
return ev; |
|
|
} |
|
|
|
|
|
private static Set<Integer> betterByDensity(Set<Integer> a, Set<Integer> b, List<Integer>[] adj) { |
|
|
double da = density(a, adj), db = density(b, adj); |
|
|
return (da >= db) ? a : b; |
|
|
} |
|
|
private static double density(Set<Integer> S, List<Integer>[] adj) { |
|
|
if (S == null || S.size() < 2) return 0.0; |
|
|
long e = 0L; |
|
|
for (int u : S) for (int v : adj[u]) if (u < v && S.contains(v)) e++; |
|
|
return (2.0 * e) / (S.size() * (S.size() - 1)); |
|
|
} |
|
|
|
|
|
private static double dot(double[] a, double[] b) { |
|
|
int n = a.length - 1; |
|
|
double s = 0.0; |
|
|
for (int i = 1; i <= n; i++) s += a[i] * b[i]; |
|
|
return s; |
|
|
} |
|
|
|
|
|
private static double norm(double[] a) { |
|
|
return Math.sqrt(dot(a, a)); |
|
|
} |
|
|
|
|
|
private static void normalize(double[] a) { |
|
|
double nrm = norm(a); |
|
|
if (nrm == 0) return; |
|
|
int n = a.length - 1; |
|
|
for (int i = 1; i <= n; i++) a[i] /= nrm; |
|
|
} |
|
|
|
|
|
private static double dist(double[] x, double[] y) { |
|
|
int n = x.length - 1; |
|
|
double s = 0.0; |
|
|
for (int i = 1; i <= n; i++) { |
|
|
double d = x[i] - y[i]; |
|
|
s += d * d; |
|
|
} |
|
|
return Math.sqrt(s); |
|
|
} |
|
|
|
|
|
|
|
|
private static Set<Integer> sweepDensityWithMin(Integer[] order, List<Integer>[] adj, int minSize) { |
|
|
int n = adj.length - 1; |
|
|
boolean[] in = new boolean[n + 1]; |
|
|
long internalEdges = 0L; |
|
|
|
|
|
int bestK = -1; |
|
|
double bestDensity = -1.0; |
|
|
|
|
|
for (int k = 1; k <= n; k++) { |
|
|
int u = order[k - 1]; |
|
|
long add = 0L; |
|
|
for (int v : adj[u]) if (in[v]) add++; |
|
|
internalEdges += add; |
|
|
in[u] = true; |
|
|
|
|
|
if (k >= minSize) { |
|
|
double denom = (double) k * (k - 1); |
|
|
double dens = (denom > 0) ? (2.0 * internalEdges) / denom : 0.0; |
|
|
if (dens > bestDensity) { bestDensity = dens; bestK = k; } |
|
|
} |
|
|
} |
|
|
|
|
|
if (bestK < 0) bestK = Math.min(n, minSize); |
|
|
|
|
|
Set<Integer> out = new LinkedHashSet<>(); |
|
|
for (int i = 0; i < bestK; i++) out.add(order[i]); |
|
|
return out; |
|
|
} |
|
|
|
|
|
|
|
|
private static double[] topEigenvectorAdjNormalized2nd(List<Integer>[] adj) { |
|
|
int n = adj.length - 1; |
|
|
int[] deg = new int[n + 1]; |
|
|
double[] sd = new double[n + 1]; |
|
|
for (int i = 1; i <= n; i++) { |
|
|
deg[i] = adj[i].size(); |
|
|
sd[i] = deg[i] > 0 ? Math.sqrt(deg[i]) : 0.0; |
|
|
} |
|
|
|
|
|
|
|
|
double[] w = new double[n + 1]; |
|
|
double wnorm2 = 0.0; |
|
|
for (int i = 1; i <= n; i++) { w[i] = sd[i]; wnorm2 += w[i] * w[i]; } |
|
|
double wnorm = wnorm2 > 0 ? Math.sqrt(wnorm2) : 1.0; |
|
|
for (int i = 1; i <= n; i++) w[i] /= wnorm; |
|
|
|
|
|
double[] u = new double[n + 1], v = new double[n + 1]; |
|
|
Random rnd = new Random(42); |
|
|
for (int i = 1; i <= n; i++) u[i] = rnd.nextDouble() - 0.5; |
|
|
|
|
|
|
|
|
double alpha = dot(u, w); |
|
|
for (int i = 1; i <= n; i++) u[i] -= alpha * w[i]; |
|
|
normalize(u); |
|
|
|
|
|
int maxIter = 200; |
|
|
double tol = 1e-6; |
|
|
for (int it = 0; it < maxIter; it++) { |
|
|
Arrays.fill(v, 0.0); |
|
|
|
|
|
|
|
|
for (int i = 1; i <= n; i++) { |
|
|
double zi = (sd[i] > 0) ? (u[i] / sd[i]) : 0.0; |
|
|
if (zi == 0.0) continue; |
|
|
for (int j : adj[i]) v[j] += zi; |
|
|
} |
|
|
for (int j = 1; j <= n; j++) v[j] = (sd[j] > 0) ? (v[j] / sd[j]) : 0.0; |
|
|
|
|
|
|
|
|
alpha = dot(v, w); |
|
|
for (int i = 1; i <= n; i++) v[i] -= alpha * w[i]; |
|
|
|
|
|
normalize(v); |
|
|
double diff = dist(u, v); |
|
|
System.arraycopy(v, 0, u, 0, n + 1); |
|
|
if (diff < tol) break; |
|
|
} |
|
|
return u; |
|
|
} |
|
|
|
|
|
static EvaluationResult runKCoreDegeneracyBaseline(SyntheticGraph g) { |
|
|
long start = System.nanoTime(); |
|
|
Set<Integer> best = new HashSet<>(); |
|
|
double bestDens = -1.0; |
|
|
|
|
|
for (int k = Math.min(200, g.n); k >= 2; k--) { |
|
|
Set<Integer> core = computeKCore(g, k); |
|
|
if (core.isEmpty()) continue; |
|
|
for (Set<Integer> comp : components(core, g.adj)) { |
|
|
int s = comp.size(); |
|
|
if (s < 2) continue; |
|
|
int edges = 0; |
|
|
for (int u : comp) for (int v : g.adj[u]) if (u < v && comp.contains(v)) edges++; |
|
|
double dens = (2.0 * edges) / (s * (s - 1)); |
|
|
if (dens > bestDens || (Math.abs(dens - bestDens) < 1e-12 && s > best.size())) { |
|
|
bestDens = dens; best = comp; |
|
|
} |
|
|
} |
|
|
} |
|
|
long end = System.nanoTime(); |
|
|
EvaluationResult ev = new EvaluationResult(best, g.plantedCluster, (end - start) / 1_000_000); |
|
|
ev.computeStats(g.adj); |
|
|
return ev; |
|
|
} |
|
|
|
|
|
static Set<Integer> computeKCore(SyntheticGraph g, int k) { |
|
|
int[] deg = new int[g.n + 1]; |
|
|
boolean[] removed = new boolean[g.n + 1]; |
|
|
Queue<Integer> queue = new LinkedList<>(); |
|
|
|
|
|
for (int i = 1; i <= g.n; i++) { |
|
|
deg[i] = g.adj[i].size(); |
|
|
if (deg[i] < k) { |
|
|
queue.offer(i); |
|
|
removed[i] = true; |
|
|
} |
|
|
} |
|
|
|
|
|
while (!queue.isEmpty()) { |
|
|
int u = queue.poll(); |
|
|
for (int v : g.adj[u]) { |
|
|
if (!removed[v]) { |
|
|
deg[v]--; |
|
|
if (deg[v] < k) { |
|
|
queue.offer(v); |
|
|
removed[v] = true; |
|
|
} |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
Set<Integer> core = new HashSet<>(); |
|
|
for (int i = 1; i <= g.n; i++) { |
|
|
if (!removed[i]) { |
|
|
core.add(i); |
|
|
} |
|
|
} |
|
|
return core; |
|
|
} |
|
|
|
|
|
static List<Set<Integer>> components(Set<Integer> nodes, List<Integer>[] adj) { |
|
|
int n = adj.length - 1; |
|
|
boolean[] in = new boolean[n + 1], vis = new boolean[n + 1]; |
|
|
for (int u : nodes) in[u] = true; |
|
|
List<Set<Integer>> comps = new ArrayList<>(); |
|
|
for (int s : nodes) if (!vis[s]) { |
|
|
Set<Integer> comp = new HashSet<>(); |
|
|
ArrayDeque<Integer> dq = new ArrayDeque<>(); |
|
|
dq.add(s); vis[s] = true; |
|
|
while (!dq.isEmpty()) { |
|
|
int u = dq.poll(); |
|
|
comp.add(u); |
|
|
for (int v : adj[u]) if (in[v] && !vis[v]) { vis[v] = true; dq.add(v); } |
|
|
} |
|
|
comps.add(comp); |
|
|
} |
|
|
return comps; |
|
|
} |
|
|
|
|
|
static EvaluationResult runDensestCharikar(SyntheticGraph g) { |
|
|
long start = System.nanoTime(); |
|
|
|
|
|
int n = g.n; |
|
|
List<Integer>[] adj = g.adj; |
|
|
|
|
|
int[] deg = new int[n + 1]; |
|
|
long m = 0L; |
|
|
for (int i = 1; i <= n; i++) { deg[i] = adj[i].size(); m += deg[i]; } |
|
|
m /= 2L; |
|
|
|
|
|
boolean[] alive = new boolean[n + 1]; |
|
|
Arrays.fill(alive, true); |
|
|
int aliveCnt = n; |
|
|
|
|
|
PriorityQueue<int[]> pq = new PriorityQueue<>(Comparator.comparingInt(a -> a[0])); |
|
|
for (int i = 1; i <= n; i++) pq.add(new int[]{deg[i], i}); |
|
|
|
|
|
double bestAvg = -1.0; |
|
|
BitSet bestMask = null; |
|
|
|
|
|
BitSet cur = new BitSet(n + 1); |
|
|
for (int i = 1; i <= n; i++) cur.set(i); |
|
|
|
|
|
while (aliveCnt > 0) { |
|
|
if (aliveCnt >= MIN_SIZE_FOR_RANKING_METHODS) { |
|
|
double avg = (aliveCnt > 0) ? (2.0 * m) / aliveCnt : 0.0; |
|
|
if (avg > bestAvg) { bestAvg = avg; bestMask = (BitSet) cur.clone(); } |
|
|
} |
|
|
|
|
|
int[] top; |
|
|
do { top = pq.poll(); } while (top != null && (!alive[top[1]] || top[0] != deg[top[1]])); |
|
|
if (top == null) break; |
|
|
int u = top[1]; |
|
|
if (!alive[u]) continue; |
|
|
|
|
|
alive[u] = false; |
|
|
cur.clear(u); |
|
|
aliveCnt--; |
|
|
for (int v : adj[u]) { |
|
|
if (alive[v]) { |
|
|
m--; |
|
|
deg[v]--; |
|
|
pq.add(new int[]{deg[v], v}); |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
if (bestMask == null) { |
|
|
bestMask = new BitSet(n + 1); |
|
|
for (int i = 1; i <= n; i++) bestMask.set(i); |
|
|
} |
|
|
|
|
|
Set<Integer> out = new LinkedHashSet<>(); |
|
|
for (int i = bestMask.nextSetBit(1); i >= 0; i = bestMask.nextSetBit(i + 1)) out.add(i); |
|
|
|
|
|
long end = System.nanoTime(); |
|
|
EvaluationResult evalResult = new EvaluationResult(out, g.plantedCluster, (end - start) / 1_000_000); |
|
|
evalResult.computeStats(g.adj); |
|
|
return evalResult; |
|
|
} |
|
|
|
|
|
static EvaluationResult runPeelingMaxDensity(SyntheticGraph g) { |
|
|
final int minSize = MIN_SIZE_FOR_RANKING_METHODS; |
|
|
long start = System.nanoTime(); |
|
|
|
|
|
Set<Integer> remaining = new HashSet<>(); |
|
|
for (int i = 1; i <= g.n; i++) remaining.add(i); |
|
|
|
|
|
Set<Integer> bestSubgraph = new HashSet<>(); |
|
|
double bestDensity = -1.0; |
|
|
|
|
|
while (remaining.size() >= minSize) { |
|
|
int minDegNode = -1; |
|
|
int minDeg = Integer.MAX_VALUE; |
|
|
long internalEdges = 0; |
|
|
|
|
|
Map<Integer, Integer> internalDegrees = new HashMap<>(); |
|
|
for(int u : remaining) { |
|
|
int deg = 0; |
|
|
for(int v : g.adj[u]) if(remaining.contains(v)) deg++; |
|
|
internalDegrees.put(u, deg); |
|
|
internalEdges += deg; |
|
|
if(deg < minDeg) { |
|
|
minDeg = deg; |
|
|
minDegNode = u; |
|
|
} |
|
|
} |
|
|
internalEdges /= 2; |
|
|
|
|
|
int k = remaining.size(); |
|
|
double density = (k > 1) ? (2.0 * internalEdges) / (k * (k - 1.0)) : 0.0; |
|
|
|
|
|
if (density > bestDensity || (Math.abs(density - bestDensity) < 1e-12 && k > bestSubgraph.size())) { |
|
|
bestDensity = density; |
|
|
bestSubgraph = new HashSet<>(remaining); |
|
|
} |
|
|
|
|
|
if (minDegNode == -1) break; |
|
|
remaining.remove(minDegNode); |
|
|
} |
|
|
|
|
|
long end = System.nanoTime(); |
|
|
EvaluationResult eval = new EvaluationResult(bestSubgraph, g.plantedCluster, (end - start) / 1_000_000); |
|
|
eval.computeStats(g.adj); |
|
|
return eval; |
|
|
} |
|
|
|
|
|
static EvaluationResult runQuasiCliqueSwap(SyntheticGraph g) { |
|
|
final int minSize = MIN_SIZE_FOR_RANKING_METHODS; |
|
|
final double DENSITY_TOL = 1e-12; |
|
|
final int MAX_ITERATIONS = 100000; |
|
|
final int SWAP_CANDIDATE_K = 100; |
|
|
|
|
|
long startTime = System.nanoTime(); |
|
|
|
|
|
EvaluationResult seedResult = runDensestCharikar(g); |
|
|
Set<Integer> C = new HashSet<>(seedResult.found); |
|
|
|
|
|
boolean[] inC = new boolean[g.n + 1]; |
|
|
for (int u : C) inC[u] = true; |
|
|
|
|
|
if (C.size() < minSize) { |
|
|
PriorityQueue<int[]> pq = new PriorityQueue<>((a, b) -> b[1] - a[1]); |
|
|
for (int i = 1; i <= g.n; i++) { |
|
|
if (!inC[i]) { |
|
|
int externalDeg = 0; |
|
|
for (int neighbor : g.adj[i]) if (inC[neighbor]) externalDeg++; |
|
|
pq.add(new int[]{i, externalDeg}); |
|
|
} |
|
|
} |
|
|
while (C.size() < minSize && !pq.isEmpty()) { |
|
|
int u_to_add = pq.poll()[0]; |
|
|
C.add(u_to_add); |
|
|
inC[u_to_add] = true; |
|
|
} |
|
|
} |
|
|
|
|
|
int[] degInC = new int[g.n + 1]; |
|
|
long m = 0; |
|
|
for (int u : C) { |
|
|
for (int v : g.adj[u]) if (inC[v]) degInC[u]++; |
|
|
m += degInC[u]; |
|
|
} |
|
|
m /= 2; |
|
|
|
|
|
for (int i = 1; i <= g.n; i++) { |
|
|
if (!inC[i]) { |
|
|
for (int v : g.adj[i]) if (inC[v]) degInC[i]++; |
|
|
} |
|
|
} |
|
|
|
|
|
int iter = 0; |
|
|
while (iter++ < MAX_ITERATIONS) { |
|
|
double currentDensity = (C.size() > 1) ? (2.0 * m) / ((long)C.size() * (C.size() - 1)) : 0.0; |
|
|
double bestGain = DENSITY_TOL; |
|
|
char bestMove = ' '; |
|
|
int bestU = -1, bestV = -1; |
|
|
|
|
|
for (int v_cand = 1; v_cand <= g.n; v_cand++) { |
|
|
if (!inC[v_cand]) { |
|
|
long n_ = C.size() + 1; |
|
|
long m_ = m + degInC[v_cand]; |
|
|
double nextDensity = (n_ > 1) ? (2.0 * m_) / ((long)n_ * (n_ - 1)) : 0.0; |
|
|
if (nextDensity - currentDensity > bestGain) { |
|
|
bestGain = nextDensity - currentDensity; |
|
|
bestMove = 'A'; |
|
|
bestV = v_cand; |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
if (C.size() > minSize) { |
|
|
for (int u_cand : C) { |
|
|
long n_ = C.size() - 1; |
|
|
long m_ = m - degInC[u_cand]; |
|
|
double nextDensity = (n_ > 1) ? (2.0 * m_) / ((long)n_ * (n_ - 1)) : 0.0; |
|
|
if (nextDensity - currentDensity > bestGain) { |
|
|
bestGain = nextDensity - currentDensity; |
|
|
bestMove = 'R'; |
|
|
bestU = u_cand; |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
List<Integer> outsideCandidates = new ArrayList<>(); |
|
|
for(int i=1; i<=g.n; i++) if(!inC[i]) outsideCandidates.add(i); |
|
|
outsideCandidates.sort((a,b) -> Integer.compare(degInC[b], degInC[a])); |
|
|
|
|
|
List<Integer> insideCandidates = new ArrayList<>(C); |
|
|
insideCandidates.sort(Comparator.comparingInt(a -> degInC[a])); |
|
|
|
|
|
for (int i = 0; i < Math.min(SWAP_CANDIDATE_K, insideCandidates.size()); i++) { |
|
|
int u_cand = insideCandidates.get(i); |
|
|
for (int j = 0; j < Math.min(SWAP_CANDIDATE_K, outsideCandidates.size()); j++) { |
|
|
int v_cand = outsideCandidates.get(j); |
|
|
boolean edgeExists = false; |
|
|
for(int neighbor : g.adj[u_cand]) if(neighbor == v_cand) { edgeExists = true; break; } |
|
|
|
|
|
long m_ = m - degInC[u_cand] + degInC[v_cand] - (edgeExists ? 1 : 0); |
|
|
double nextDensity = (C.size() > 1) ? (2.0 * m_) / ((long)C.size() * (C.size() - 1)) : 0.0; |
|
|
|
|
|
if (nextDensity - currentDensity > bestGain) { |
|
|
bestGain = nextDensity - currentDensity; |
|
|
bestMove = 'S'; |
|
|
bestU = u_cand; |
|
|
bestV = v_cand; |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
if (bestMove == ' ') break; |
|
|
|
|
|
if (bestMove == 'A') { |
|
|
m += degInC[bestV]; |
|
|
C.add(bestV); |
|
|
inC[bestV] = true; |
|
|
for (int neighbor : g.adj[bestV]) degInC[neighbor]++; |
|
|
} else if (bestMove == 'R') { |
|
|
m -= degInC[bestU]; |
|
|
C.remove(bestU); |
|
|
inC[bestU] = false; |
|
|
for (int neighbor : g.adj[bestU]) degInC[neighbor]--; |
|
|
} else if (bestMove == 'S') { |
|
|
boolean edgeExists = false; |
|
|
for(int neighbor : g.adj[bestU]) if(neighbor == bestV) { edgeExists = true; break; } |
|
|
|
|
|
m = m - degInC[bestU] + degInC[bestV] - (edgeExists ? 1 : 0); |
|
|
|
|
|
C.remove(bestU); |
|
|
inC[bestU] = false; |
|
|
for (int neighbor : g.adj[bestU]) degInC[neighbor]--; |
|
|
|
|
|
C.add(bestV); |
|
|
inC[bestV] = true; |
|
|
for (int neighbor : g.adj[bestV]) degInC[neighbor]++; |
|
|
} |
|
|
} |
|
|
|
|
|
long endTime = System.nanoTime(); |
|
|
EvaluationResult eval = new EvaluationResult(C, g.plantedCluster, (endTime - startTime) / 1_000_000); |
|
|
eval.computeStats(g.adj); |
|
|
return eval; |
|
|
} |
|
|
|
|
|
public static void main(String[] args) { |
|
|
double eps = 10; |
|
|
int numRuns = 10; |
|
|
boolean testMode = false; |
|
|
|
|
|
LinkedHashSet<String> selectedAlgs = null; |
|
|
boolean epsSet = false; |
|
|
List<Double> pOutOverride = null; |
|
|
|
|
|
for (int i = 0; i < args.length; i++) { |
|
|
String arg = args[i]; |
|
|
if ("--test-mode".equals(arg)) { |
|
|
testMode = true; |
|
|
} else if (arg.startsWith("--algs")) { |
|
|
String value = null; |
|
|
if (arg.equals("--algs")) { |
|
|
if (i + 1 < args.length) value = args[++i]; |
|
|
else { System.err.println("Missing value for --algs; ignoring and running all algorithms."); continue; } |
|
|
} else if (arg.startsWith("--algs=")) { |
|
|
value = arg.substring("--algs=".length()); |
|
|
} |
|
|
|
|
|
if (value != null) { |
|
|
LinkedHashSet<String> parsed = new LinkedHashSet<>(); |
|
|
for (String tok : value.split(",")) { |
|
|
String t = tok.trim(); |
|
|
if (t.isEmpty()) continue; |
|
|
String canon; |
|
|
if (t.equalsIgnoreCase("l-rmc") || t.equalsIgnoreCase("lrmc")) canon = "L-RMC"; |
|
|
else if (t.equalsIgnoreCase("k-core") || t.equalsIgnoreCase("kcore")) canon = "k-core"; |
|
|
else if (t.equalsIgnoreCase("densest")) canon = "Densest"; |
|
|
else if (t.equalsIgnoreCase("peeling") || t.equalsIgnoreCase("peeling-density")) canon = "Peeling"; |
|
|
else if (t.equalsIgnoreCase("quasiclique") || t.equalsIgnoreCase("quasi-clique")) canon = "QuasiClique"; |
|
|
else if (t.equalsIgnoreCase("spectral")) canon = "Spectral"; |
|
|
else { System.err.println("Unknown algorithm in --algs: " + t + "; ignoring it."); continue; } |
|
|
parsed.add(canon); |
|
|
} |
|
|
selectedAlgs = parsed; |
|
|
} |
|
|
} else if (arg.startsWith("--pout")) { |
|
|
String value = null; |
|
|
if (arg.equals("--pout")) { |
|
|
if (i + 1 < args.length) value = args[++i]; |
|
|
else { System.err.println("Missing value for --pout; ignoring and using defaults."); continue; } |
|
|
} else if (arg.startsWith("--pout=")) { |
|
|
value = arg.substring("--pout=".length()); |
|
|
} |
|
|
|
|
|
if (value != null) { |
|
|
List<Double> parsed = new ArrayList<>(); |
|
|
for (String tok : value.split(",")) { |
|
|
try { parsed.add(Double.parseDouble(tok.trim())); } |
|
|
catch (NumberFormatException e) { System.err.println("Invalid --pout value '" + tok.trim() + "'; skipping it."); } |
|
|
} |
|
|
if (!parsed.isEmpty()) pOutOverride = parsed; |
|
|
else System.err.println("No valid values parsed for --pout; using defaults."); |
|
|
} |
|
|
} else if (!arg.startsWith("--") && !epsSet) { |
|
|
try { |
|
|
eps = Double.parseDouble(arg); |
|
|
epsSet = true; |
|
|
} catch (NumberFormatException e) { |
|
|
System.err.println("Invalid epsilon value '" + arg + "'. Using default value of 10."); |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
try { |
|
|
runAndWriteCsv(eps, numRuns, selectedAlgs, testMode, pOutOverride); |
|
|
} catch (IOException e) { |
|
|
throw new RuntimeException(e); |
|
|
} |
|
|
} |
|
|
|
|
|
private static void sleep3s() { |
|
|
try { Thread.sleep(3000); } catch (InterruptedException e) { Thread.currentThread().interrupt(); } |
|
|
} |
|
|
|
|
|
private static void runAndWriteCsv(double eps, int numRuns, LinkedHashSet<String> selectedAlgs, boolean testMode, List<Double> pOutOverride) throws IOException { |
|
|
final int nTotal = testMode ? 500 : 2500; |
|
|
int[] clusterSizes = testMode ? new int[]{25, 50, 75} : new int[]{100, 150, 200}; |
|
|
double[] pIns = testMode ? new double[]{0.8, 0.9} : new double[]{0.6, 0.7, 0.8, 0.9}; |
|
|
double[] pOuts; |
|
|
if (pOutOverride != null && !pOutOverride.isEmpty()) { |
|
|
pOuts = pOutOverride.stream().mapToDouble(d->d).toArray(); |
|
|
} else { |
|
|
pOuts = testMode ? new double[]{0.1, 0.15, 0.25, 0.35} : new double[]{0.25, 0.35, 0.45}; |
|
|
} |
|
|
|
|
|
String extra = ""; |
|
|
if (pOutOverride != null && !pOutOverride.isEmpty()) { |
|
|
StringJoiner sj = new StringJoiner(","); |
|
|
for (double v : pOuts) sj.add(String.format(java.util.Locale.US, "%.2f", v)); |
|
|
extra = ",pOut=" + sj; |
|
|
} |
|
|
String outPath = String.format( |
|
|
java.util.Locale.US, |
|
|
"synthetic/Table1-nTotal=%d,eps=%.1e%s.csv", |
|
|
nTotal, eps, extra); |
|
|
|
|
|
List<String> order = Arrays.asList("L-RMC", "k-core", "Densest", "QuasiClique", "Spectral"); |
|
|
LinkedHashSet<String> algsToRun = (selectedAlgs == null || selectedAlgs.isEmpty()) |
|
|
? new LinkedHashSet<>(order) |
|
|
: selectedAlgs; |
|
|
|
|
|
try (PrintWriter w = new PrintWriter(new BufferedWriter(new FileWriter(outPath)))) { |
|
|
w.println("Method,ClusterSize,InternalDensity,ExternalDensity,Precision,Recall,F1,Density,MinDeg,AvgDeg,MaxDeg,RMCScore,Runtime"); |
|
|
|
|
|
for (int k : clusterSizes) { |
|
|
for (double pIn : pIns) { |
|
|
for (double pOut : pOuts) { |
|
|
Map<String, AggregatedResult> agg = new LinkedHashMap<>(); |
|
|
for (String name : algsToRun) agg.put(name, new AggregatedResult()); |
|
|
|
|
|
for (int run = 0; run < numRuns; run++) { |
|
|
Random rand = new Random(42 + run); |
|
|
SyntheticGraph g = generatePlantedClusterGraph(nTotal, k, pIn, pOut, rand); |
|
|
|
|
|
if (algsToRun.contains("L-RMC")) { |
|
|
agg.get("L-RMC").addResult(runSingleLRMC(g, eps)); |
|
|
sleep3s(); |
|
|
} |
|
|
if (algsToRun.contains("k-core")) { |
|
|
agg.get("k-core").addResult(runKCoreDegeneracyBaseline(g)); |
|
|
sleep3s(); |
|
|
} |
|
|
if (algsToRun.contains("Densest")) { |
|
|
agg.get("Densest").addResult(runDensestCharikar(g)); |
|
|
sleep3s(); |
|
|
} |
|
|
if (algsToRun.contains("Peeling")) { |
|
|
agg.get("Peeling").addResult(runPeelingMaxDensity(g)); |
|
|
sleep3s(); |
|
|
} |
|
|
if (algsToRun.contains("QuasiClique")) { |
|
|
agg.get("QuasiClique").addResult(runQuasiCliqueSwap(g)); |
|
|
sleep3s(); |
|
|
} |
|
|
if (algsToRun.contains("Spectral")) { |
|
|
agg.get("Spectral").addResult(runSpectralBaseline(g)); |
|
|
sleep3s(); |
|
|
} |
|
|
} |
|
|
|
|
|
for (String name : order) { |
|
|
if (algsToRun.contains(name)) { |
|
|
agg.get(name).average(); |
|
|
writeAggregatedRow(w, name, k, pIn, pOut, agg.get(name)); |
|
|
} |
|
|
} |
|
|
System.out.printf("Finished instance: k=%d, pIn=%.1f, pOut=%.2f (averaged over %d runs)%n", k, pIn, pOut, numRuns); |
|
|
System.out.flush(); |
|
|
} |
|
|
} |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
private static void writeAggregatedRow(PrintWriter w, String method, int k, double pIn, double pOut, AggregatedResult r) { |
|
|
w.printf(java.util.Locale.US, |
|
|
"%s,%d,%.1f,%.2f,%.3f,%.3f,%.3f,%.3f,%.1f,%.1f,%.1f,%d,%d%n", |
|
|
method, k, pIn, pOut, r.precision, r.recall, r.f1, r.density, r.minDeg, r.avgDeg, r.maxDeg, r.rmcScore, r.runtimeMs); |
|
|
} |
|
|
|
|
|
static SyntheticGraph generatePlantedClusterGraph(int nTotal, int clusterSize, double pIn, double pBackground, Random rand) { |
|
|
SyntheticGraph g = new SyntheticGraph(nTotal); |
|
|
for (int i = 1; i <= clusterSize; i++) g.plantedCluster.add(i); |
|
|
|
|
|
for (int i = 1; i <= clusterSize; i++) { |
|
|
for (int j = i + 1; j <= clusterSize; j++) { |
|
|
if (rand.nextDouble() < pIn) g.addEdge(i, j); |
|
|
} |
|
|
} |
|
|
|
|
|
for (int i = 1; i <= clusterSize; i++) { |
|
|
for (int j = clusterSize + 1; j <= nTotal; j++) { |
|
|
if (rand.nextDouble() < pBackground) g.addEdge(i, j); |
|
|
} |
|
|
} |
|
|
|
|
|
for (int i = clusterSize + 1; i <= nTotal; i++) { |
|
|
for (int j = i + 1; j <= nTotal; j++) { |
|
|
if (rand.nextDouble() < pBackground) g.addEdge(i, j); |
|
|
} |
|
|
} |
|
|
return g; |
|
|
} |
|
|
} |
|
|
|