| export interface UserNode { |
| displayName: string; |
| username: string; |
| } |
|
|
| export interface ParseResult { |
| relationships: Map<string, { following: UserNode[], followers: UserNode[] }>; |
| allUsers: Map<string, UserNode>; |
| logs: string[]; |
| } |
|
|
| export function parseSocialData(text: string): ParseResult { |
| const result: ParseResult = { |
| relationships: new Map(), |
| allUsers: new Map(), |
| logs: [], |
| }; |
|
|
| const addLog = (msg: string) => { |
| result.logs.push(`[${new Date().toLocaleTimeString()}] ${msg}`); |
| }; |
|
|
| |
| let cleaned = text.replace(/[\u200B-\u200D\uFEFF]/g, ''); |
| cleaned = cleaned.replace(/\.{3,}/g, '\n'); |
|
|
| |
| cleaned = cleaned.replace(/(Following|Followers)(\d+)/gi, '\n$1\n$2\n'); |
|
|
| const rawLines = cleaned.split('\n').map(l => l.trim()).filter(l => l.length > 0); |
| addLog(`Started parsing. Extracted ${rawLines.length} raw lines after cleanup.`); |
|
|
| let currentSubject: string | null = null; |
| let currentContext: 'following' | 'followers' | null = null; |
| let lastParsedNode: UserNode | null = null; |
|
|
| let i = 0; |
| |
| while (i < rawLines.length) { |
| const line = rawLines[i]; |
| |
| |
| if (/^(Following|Followers)$/i.test(line)) { |
| const isFollowing = line.toLowerCase() === 'following'; |
| const newContext = isFollowing ? 'following' : 'followers'; |
| |
| if (i + 1 < rawLines.length && /^\d+$/.test(rawLines[i+1])) { |
| i++; |
| } |
| |
| if (lastParsedNode) { |
| if (currentSubject !== lastParsedNode.username) { |
| currentSubject = lastParsedNode.username; |
| if (!result.relationships.has(currentSubject)) { |
| result.relationships.set(currentSubject, { following: [], followers: [] }); |
| } |
| addLog(`New graph center identified: ${currentSubject} (Tracking ${newContext})`); |
| |
| |
| for (const rels of result.relationships.values()) { |
| if (rels.following.length > 0 && rels.following[rels.following.length - 1].username === lastParsedNode.username) { |
| rels.following.pop(); |
| } |
| if (rels.followers.length > 0 && rels.followers[rels.followers.length - 1].username === lastParsedNode.username) { |
| rels.followers.pop(); |
| } |
| } |
| } else { |
| addLog(`Switched context to: ${newContext} for ${currentSubject}`); |
| } |
| } |
| |
| currentContext = newContext; |
| i++; |
| continue; |
| } |
| |
| |
| if (/^\d+$/.test(line)) { |
| i++; |
| continue; |
| } |
| |
| let displayName = ""; |
| let username = ""; |
| |
| const nextLine = (i + 1 < rawLines.length) ? rawLines[i+1] : null; |
| const nextIsHeaderOrNumber = nextLine && (/^(Following|Followers)$/i.test(nextLine) || /^\d+$/.test(nextLine)); |
| |
| const isUsername = (str: string) => /^@?[a-z0-9._]+$/i.test(str); |
| |
| |
| if (nextLine && !nextIsHeaderOrNumber && isUsername(nextLine)) { |
| displayName = line; |
| username = nextLine; |
| i += 2; |
| } else { |
| |
| const match = line.match(/^(.*?)(@?[a-z0-9._]+)$/i); |
| if (match && match[2].length > 0) { |
| displayName = match[1].trim(); |
| username = match[2]; |
| if (!displayName) displayName = username; |
| } else { |
| displayName = line; |
| username = line.replace(/\s+/g, '').toLowerCase(); |
| } |
| i++; |
| } |
| |
| username = username.replace(/^@/, ''); |
| |
| |
| if (/^\d+$/.test(username) || /^user\d+$/i.test(username)) { |
| continue; |
| } |
| |
| const node: UserNode = { displayName, username }; |
| result.allUsers.set(username, node); |
| lastParsedNode = node; |
| |
| if (currentSubject && currentContext) { |
| const rels = result.relationships.get(currentSubject); |
| if (rels) { |
| rels[currentContext].push(node); |
| } |
| } |
| } |
| |
| return result; |
| } |
|
|
| export interface MemberMetrics { |
| username: string; |
| inDegree: number; |
| outDegree: number; |
| mutuals: number; |
| score: number; |
| isAltCandidate?: boolean; |
| altOf?: string; |
| primaryCenter?: string; |
| } |
|
|
| export function findShortestPath(data: ParseResult, start: string, end: string): string[] | null { |
| if (start === end) return [start]; |
|
|
| |
| const adj = new Map<string, Set<string>>(); |
| |
| const addEdge = (u: string, v: string) => { |
| if (!adj.has(u)) adj.set(u, new Set()); |
| adj.get(u)!.add(v); |
| }; |
|
|
| data.relationships.forEach((rels, subject) => { |
| rels.following.forEach(u => addEdge(subject, u.username)); |
| rels.followers.forEach(u => addEdge(u.username, subject)); |
| }); |
|
|
| const queue: [string, string[]][] = [[start, [start]]]; |
| const visited = new Set<string>([start]); |
|
|
| while (queue.length > 0) { |
| const [node, path] = queue.shift()!; |
| if (node === end) return path; |
|
|
| const neighbors = adj.get(node) || new Set(); |
| for (const neighbor of neighbors) { |
| if (!visited.has(neighbor)) { |
| visited.add(neighbor); |
| queue.push([neighbor, [...path, neighbor]]); |
| } |
| } |
| } |
|
|
| return null; |
| } |
|
|
| export function getCoreMembers(data: ParseResult): MemberMetrics[] { |
| const metrics = new Map<string, MemberMetrics>(); |
|
|
| const getOrCreate = (username: string) => { |
| if (!metrics.has(username)) { |
| metrics.set(username, { username, inDegree: 0, outDegree: 0, mutuals: 0, score: 0 }); |
| } |
| return metrics.get(username)!; |
| }; |
|
|
| |
| const outwardEdges = new Map<string, Set<string>>(); |
|
|
| for (const [subject, rels] of data.relationships.entries()) { |
| if (!outwardEdges.has(subject)) outwardEdges.set(subject, new Set()); |
| const subjectOut = outwardEdges.get(subject)!; |
|
|
| for (const u of rels.following) { |
| subjectOut.add(u.username); |
| } |
| } |
|
|
| const centerRep = new Map<string, Map<string, number>>(); |
|
|
| |
| for (const [subject, rels] of data.relationships.entries()) { |
| const subjNode = getOrCreate(subject); |
| |
| for (const u of rels.following) { |
| subjNode.outDegree++; |
| const targetNode = getOrCreate(u.username); |
| targetNode.inDegree++; |
| |
| if (!centerRep.has(u.username)) centerRep.set(u.username, new Map()); |
| centerRep.get(u.username)!.set(subject, (centerRep.get(u.username)!.get(subject) || 0) + 1); |
| } |
|
|
| for (const u of rels.followers) { |
| subjNode.inDegree++; |
| const sourceNode = getOrCreate(u.username); |
| sourceNode.outDegree++; |
|
|
| if (!centerRep.has(u.username)) centerRep.set(u.username, new Map()); |
| centerRep.get(u.username)!.set(subject, (centerRep.get(u.username)!.get(subject) || 0) + 1); |
|
|
| if (outwardEdges.get(subject)?.has(u.username)) { |
| subjNode.mutuals++; |
| sourceNode.mutuals++; |
| } |
| } |
| } |
|
|
| |
| |
| for (const m of metrics.values()) { |
| m.score = m.inDegree + m.outDegree + (m.mutuals * 3); |
| |
| |
| if (centerRep.has(m.username)) { |
| const reps = Array.from(centerRep.get(m.username)!.entries()); |
| reps.sort((a, b) => b[1] - a[1]); |
| if (reps.length > 0) m.primaryCenter = reps[0][0]; |
| } else if (data.relationships.has(m.username)) { |
| m.primaryCenter = m.username; |
| } |
| } |
|
|
| const sorted = Array.from(metrics.values()).sort((a, b) => b.score - a.score); |
|
|
| |
| for (let i = 0; i < sorted.length; i++) { |
| for (let j = i + 1; j < sorted.length; j++) { |
| const u1 = sorted[i].username.toLowerCase(); |
| const u2 = sorted[j].username.toLowerCase(); |
| if (u1.length < 3 || u2.length < 3) continue; |
|
|
| |
| const isSimilar = u1.includes(u2) || u2.includes(u1) || |
| (u1.slice(0, 5) === u2.slice(0, 5) && Math.abs(u1.length - u2.length) < 3); |
|
|
| if (isSimilar) { |
| |
| const s1 = outwardEdges.get(u1) || new Set(); |
| const s2 = outwardEdges.get(u2) || new Set(); |
| let intersection = 0; |
| s1.forEach(x => { if (s2.has(x)) intersection++; }); |
| const union = s1.size + s2.size - intersection; |
| const jaccard = union > 0 ? intersection / union : 0; |
|
|
| if (jaccard > 0.3 || (isSimilar && (s1.size < 5 || s2.size < 5))) { |
| if (!sorted[j].isAltCandidate) { |
| sorted[j].isAltCandidate = true; |
| sorted[j].altOf = u1; |
| } |
| } |
| } |
| } |
| } |
|
|
| return sorted; |
| } |
|
|
| export interface ClusterInfo { |
| center: string; |
| members: string[]; |
| color: string; |
| } |
|
|
| export function detectClusters(data: ParseResult, metrics: MemberMetrics[]): ClusterInfo[] { |
| const centers = Array.from(data.relationships.keys()); |
| const clusters = new Map<string, string[]>(); |
| |
| centers.forEach(c => clusters.set(c, [c])); |
| |
| metrics.forEach(m => { |
| if (m.primaryCenter && m.primaryCenter !== m.username) { |
| if (clusters.has(m.primaryCenter)) { |
| clusters.get(m.primaryCenter)!.push(m.username); |
| } |
| } |
| }); |
|
|
| const colors = ["#6366f1", "#0ea5e9", "#10b981", "#f59e0b", "#ef4444", "#8b5cf6", "#ec4899", "#14b8a6", "#f97316"]; |
| |
| return Array.from(clusters.entries()) |
| .map(([center, members], idx) => ({ |
| center, |
| members, |
| color: colors[idx % colors.length] |
| })) |
| .filter(c => c.members.length > 2) |
| .sort((a, b) => b.members.length - a.members.length); |
| } |
|
|