export interface UserNode { displayName: string; username: string; } export interface ParseResult { relationships: Map; allUsers: Map; logs: string[]; } export function parseSocialData(text: string): ParseResult { const result: ParseResult = { relationships: new Map(), allUsers: new Map(), logs: [], }; const addLog = (msg: string) => { result.logs.push(`[${new Date().toLocaleTimeString()}] ${msg}`); }; // 1. Clean text let cleaned = text.replace(/[\u200B-\u200D\uFEFF]/g, ''); cleaned = cleaned.replace(/\.{3,}/g, '\n'); // Make sure headers are on their own lines cleaned = cleaned.replace(/(Following|Followers)(\d+)/gi, '\n$1\n$2\n'); const rawLines = cleaned.split('\n').map(l => l.trim()).filter(l => l.length > 0); addLog(`Started parsing. Extracted ${rawLines.length} raw lines after cleanup.`); let currentSubject: string | null = null; let currentContext: 'following' | 'followers' | null = null; let lastParsedNode: UserNode | null = null; let i = 0; while (i < rawLines.length) { const line = rawLines[i]; // Check if header if (/^(Following|Followers)$/i.test(line)) { const isFollowing = line.toLowerCase() === 'following'; const newContext = isFollowing ? 'following' : 'followers'; if (i + 1 < rawLines.length && /^\d+$/.test(rawLines[i+1])) { i++; // Skip the count line } if (lastParsedNode) { if (currentSubject !== lastParsedNode.username) { currentSubject = lastParsedNode.username; if (!result.relationships.has(currentSubject)) { result.relationships.set(currentSubject, { following: [], followers: [] }); } addLog(`New graph center identified: ${currentSubject} (Tracking ${newContext})`); // Remove the subject from the previous context's relationships for (const rels of result.relationships.values()) { if (rels.following.length > 0 && rels.following[rels.following.length - 1].username === lastParsedNode.username) { rels.following.pop(); } if (rels.followers.length > 0 && rels.followers[rels.followers.length - 1].username === lastParsedNode.username) { rels.followers.pop(); } } } else { addLog(`Switched context to: ${newContext} for ${currentSubject}`); } } currentContext = newContext; i++; continue; } // Ignore stray numbers if (/^\d+$/.test(line)) { i++; continue; } let displayName = ""; let username = ""; const nextLine = (i + 1 < rawLines.length) ? rawLines[i+1] : null; const nextIsHeaderOrNumber = nextLine && (/^(Following|Followers)$/i.test(nextLine) || /^\d+$/.test(nextLine)); const isUsername = (str: string) => /^@?[a-z0-9._]+$/i.test(str); // 2-line structure heuristic if (nextLine && !nextIsHeaderOrNumber && isUsername(nextLine)) { displayName = line; username = nextLine; i += 2; } else { // 1-line structure (glued or standalone) const match = line.match(/^(.*?)(@?[a-z0-9._]+)$/i); if (match && match[2].length > 0) { displayName = match[1].trim(); username = match[2]; if (!displayName) displayName = username; } else { displayName = line; username = line.replace(/\s+/g, '').toLowerCase(); } i++; } username = username.replace(/^@/, ''); // Filter purely numeric edge cases or generic bots if (/^\d+$/.test(username) || /^user\d+$/i.test(username)) { continue; } const node: UserNode = { displayName, username }; result.allUsers.set(username, node); lastParsedNode = node; if (currentSubject && currentContext) { const rels = result.relationships.get(currentSubject); if (rels) { rels[currentContext].push(node); } } } return result; } export interface MemberMetrics { username: string; inDegree: number; outDegree: number; mutuals: number; score: number; isAltCandidate?: boolean; altOf?: string; primaryCenter?: string; } export function findShortestPath(data: ParseResult, start: string, end: string): string[] | null { if (start === end) return [start]; // Adjacency list from all relationships const adj = new Map>(); const addEdge = (u: string, v: string) => { if (!adj.has(u)) adj.set(u, new Set()); adj.get(u)!.add(v); }; data.relationships.forEach((rels, subject) => { rels.following.forEach(u => addEdge(subject, u.username)); rels.followers.forEach(u => addEdge(u.username, subject)); }); const queue: [string, string[]][] = [[start, [start]]]; const visited = new Set([start]); while (queue.length > 0) { const [node, path] = queue.shift()!; if (node === end) return path; const neighbors = adj.get(node) || new Set(); for (const neighbor of neighbors) { if (!visited.has(neighbor)) { visited.add(neighbor); queue.push([neighbor, [...path, neighbor]]); } } } return null; } export function getCoreMembers(data: ParseResult): MemberMetrics[] { const metrics = new Map(); const getOrCreate = (username: string) => { if (!metrics.has(username)) { metrics.set(username, { username, inDegree: 0, outDegree: 0, mutuals: 0, score: 0 }); } return metrics.get(username)!; }; // Build a set of connections for quick mutual lookup const outwardEdges = new Map>(); for (const [subject, rels] of data.relationships.entries()) { if (!outwardEdges.has(subject)) outwardEdges.set(subject, new Set()); const subjectOut = outwardEdges.get(subject)!; for (const u of rels.following) { subjectOut.add(u.username); } } const centerRep = new Map>(); // Calculate degrees and center reps for (const [subject, rels] of data.relationships.entries()) { const subjNode = getOrCreate(subject); for (const u of rels.following) { subjNode.outDegree++; const targetNode = getOrCreate(u.username); targetNode.inDegree++; if (!centerRep.has(u.username)) centerRep.set(u.username, new Map()); centerRep.get(u.username)!.set(subject, (centerRep.get(u.username)!.get(subject) || 0) + 1); } for (const u of rels.followers) { subjNode.inDegree++; const sourceNode = getOrCreate(u.username); sourceNode.outDegree++; if (!centerRep.has(u.username)) centerRep.set(u.username, new Map()); centerRep.get(u.username)!.set(subject, (centerRep.get(u.username)!.get(subject) || 0) + 1); if (outwardEdges.get(subject)?.has(u.username)) { subjNode.mutuals++; sourceNode.mutuals++; } } } // Calculate a simplified eigenvector-like centrality score // Degree + (mutuals * 3) for (const m of metrics.values()) { m.score = m.inDegree + m.outDegree + (m.mutuals * 3); // Assign primary center if (centerRep.has(m.username)) { const reps = Array.from(centerRep.get(m.username)!.entries()); reps.sort((a, b) => b[1] - a[1]); if (reps.length > 0) m.primaryCenter = reps[0][0]; } else if (data.relationships.has(m.username)) { m.primaryCenter = m.username; } } const sorted = Array.from(metrics.values()).sort((a, b) => b.score - a.score); // Advanced Algorithm: Alt Detection Heuristic for (let i = 0; i < sorted.length; i++) { for (let j = i + 1; j < sorted.length; j++) { const u1 = sorted[i].username.toLowerCase(); const u2 = sorted[j].username.toLowerCase(); if (u1.length < 3 || u2.length < 3) continue; // Substring match or common prefix/suffix const isSimilar = u1.includes(u2) || u2.includes(u1) || (u1.slice(0, 5) === u2.slice(0, 5) && Math.abs(u1.length - u2.length) < 3); if (isSimilar) { // High mutual connection similarity also helps const s1 = outwardEdges.get(u1) || new Set(); const s2 = outwardEdges.get(u2) || new Set(); let intersection = 0; s1.forEach(x => { if (s2.has(x)) intersection++; }); const union = s1.size + s2.size - intersection; const jaccard = union > 0 ? intersection / union : 0; if (jaccard > 0.3 || (isSimilar && (s1.size < 5 || s2.size < 5))) { if (!sorted[j].isAltCandidate) { sorted[j].isAltCandidate = true; sorted[j].altOf = u1; } } } } } return sorted; } export interface ClusterInfo { center: string; members: string[]; color: string; } export function detectClusters(data: ParseResult, metrics: MemberMetrics[]): ClusterInfo[] { const centers = Array.from(data.relationships.keys()); const clusters = new Map(); centers.forEach(c => clusters.set(c, [c])); metrics.forEach(m => { if (m.primaryCenter && m.primaryCenter !== m.username) { if (clusters.has(m.primaryCenter)) { clusters.get(m.primaryCenter)!.push(m.username); } } }); const colors = ["#6366f1", "#0ea5e9", "#10b981", "#f59e0b", "#ef4444", "#8b5cf6", "#ec4899", "#14b8a6", "#f97316"]; return Array.from(clusters.entries()) .map(([center, members], idx) => ({ center, members, color: colors[idx % colors.length] })) .filter(c => c.members.length > 2) .sort((a, b) => b.members.length - a.members.length); }