knowledge-graph-preview / cli /analyzer /graph-builder.js
mr4's picture
Upload 136 files
fd8cdf5 verified
import * as path from 'node:path';
import * as posix from 'node:path/posix';
/**
* Generates a summary string from a file's parse result and metadata.
*
* Priority:
* 1. Exports list (max 5, then "...")
* 2. Classes list
* 3. Functions list (if no exports)
* 4. Fallback: "{lineCount} lines of {language}"
*/
function generateSummary(parseResult, file) {
if (!parseResult) {
return `${file.lineCount} lines of ${file.language}`;
}
if (parseResult.exports.length > 0) {
const maxExports = 5;
const shown = parseResult.exports.slice(0, maxExports);
const suffix = parseResult.exports.length > maxExports ? ', ...' : '';
return `Exports: ${shown.join(', ')}${suffix}`;
}
if (parseResult.classes.length > 0) {
const classNames = parseResult.classes.map(c => c.name);
return `Classes: ${classNames.join(', ')}`;
}
if (parseResult.functions.length > 0) {
const funcNames = parseResult.functions.map(f => f.name);
return `Functions: ${funcNames.join(', ')}`;
}
return `${file.lineCount} lines of ${file.language}`;
}
/**
* Assigns tags to a file node based on language, category, path patterns, and complexity.
*/
function assignTags(file) {
const tags = [];
// Language tag
tags.push(file.language);
// Category tag
tags.push(file.category);
// Path-based tags
const relativeLower = file.relativePath.toLowerCase();
if (/(?:^|\/)(test|spec|__test__|__tests__|__spec__)(?:\/|$)/.test(relativeLower) ||
/\.(test|spec)\.[^/]+$/.test(relativeLower)) {
tags.push('test');
}
if (/(?:^|\/)components?(?:\/|$)/.test(relativeLower)) {
tags.push('component');
}
if (/(?:^\/|\/)(utils?|lib|helpers?)(?:\/|$)/.test(relativeLower) ||
/^(utils?|lib|helpers?)(?:\/|$)/.test(relativeLower)) {
tags.push('util');
}
if (/(?:^|\/)(?:api|routes?|controllers?)(?:\/|$)/.test(relativeLower)) {
tags.push('api');
}
if (/(?:^|\/)(?:models?|entities)(?:\/|$)/.test(relativeLower)) {
tags.push('model');
}
// Complexity tag based on line count
if (file.lineCount < 50) {
tags.push('simple');
}
else if (file.lineCount <= 200) {
tags.push('moderate');
}
else {
tags.push('complex');
}
return tags;
}
/**
* Builds file-level graph nodes from scanned files and their parse results.
*
* For each file, creates a DashboardNode with:
* - id: the file's relative path
* - type: "file"
* - name: the file's basename
* - summary: generated from parse result (exports, classes, functions, or fallback)
* - tags: language, category, path-based patterns, and complexity
*
* @param files - Scanned file entries with metadata
* @param parseResults - Map of file relative path to its parse result
* @returns Array of DashboardNode for each file
*/
export function buildFileNodes(files, parseResults) {
return files.map(file => {
const parseResult = parseResults.get(file.relativePath);
return {
id: file.relativePath,
type: 'file',
name: path.basename(file.relativePath),
summary: generateSummary(parseResult, file),
tags: assignTags(file),
};
});
}
/**
* Builds function-level graph nodes for significant functions.
*
* A function is "significant" if it is exported OR has 10+ lines (endLine - startLine + 1 >= 10).
*
* For each significant function, creates a DashboardNode with:
* - id: "{relativePath}::{functionName}"
* - type: "function"
* - name: the function name
* - summary: "({params}) → {lineCount} lines"
* - tags: language, "exported" if exported, "async" if name suggests async
*
* Also creates a `contains` edge from the file node to the function node.
*
* @param files - Scanned file entries with metadata
* @param parseResults - Map of file relative path to its parse result
* @returns Object with nodes and edges arrays
*/
export function buildFunctionNodes(files, parseResults) {
const nodes = [];
const edges = [];
for (const file of files) {
const parseResult = parseResults.get(file.relativePath);
if (!parseResult)
continue;
for (const func of parseResult.functions) {
const lineCount = func.endLine - func.startLine + 1;
const isSignificant = func.isExported || lineCount >= 10;
if (!isSignificant)
continue;
const nodeId = `${file.relativePath}::${func.name}`;
const tags = [file.language];
if (func.isExported)
tags.push('exported');
if (func.name.startsWith('async') || func.name.includes('Async') || func.name.includes('async')) {
tags.push('async');
}
nodes.push({
id: nodeId,
type: 'function',
name: func.name,
summary: `(${func.params.join(', ')}) → ${lineCount} lines`,
tags,
});
edges.push({
source: file.relativePath,
target: nodeId,
type: 'contains',
});
}
}
return { nodes, edges };
}
/**
* Builds class-level graph nodes for all classes found in parse results.
*
* For each class, creates a DashboardNode with:
* - id: "{relativePath}::{className}"
* - type: "class"
* - name: the class name
* - summary: "Methods: {methods}" or "Empty class" if no methods
* - tags: language, "exported" if exported, size tag (small/medium/large-class)
*
* Also creates a `contains` edge from the file node to the class node.
*
* @param files - Scanned file entries with metadata
* @param parseResults - Map of file relative path to its parse result
* @returns Object with nodes and edges arrays
*/
export function buildClassNodes(files, parseResults) {
const nodes = [];
const edges = [];
for (const file of files) {
const parseResult = parseResults.get(file.relativePath);
if (!parseResult)
continue;
for (const cls of parseResult.classes) {
const nodeId = `${file.relativePath}::${cls.name}`;
const tags = [file.language];
if (cls.isExported)
tags.push('exported');
// Size tag based on method count
const methodCount = cls.methods.length;
if (methodCount <= 3) {
tags.push('small-class');
}
else if (methodCount <= 10) {
tags.push('medium-class');
}
else {
tags.push('large-class');
}
const summary = cls.methods.length > 0
? `Methods: ${cls.methods.join(', ')}`
: 'Empty class';
nodes.push({
id: nodeId,
type: 'class',
name: cls.name,
summary,
tags,
});
edges.push({
source: file.relativePath,
target: nodeId,
type: 'contains',
});
}
}
return { nodes, edges };
}
/**
* Common file extensions to try when resolving import paths.
*/
const RESOLVE_EXTENSIONS = ['.ts', '.tsx', '.js', '.jsx', '.mjs', '.py', '.go', '.rs', '.java'];
/**
* Index file names to try for directory imports.
*/
const INDEX_FILES = ['index.ts', 'index.js'];
/**
* Resolves a relative import path to a known file's relativePath.
*
* Tries the following resolution strategies:
* 1. Exact path (if it already has an extension and matches)
* 2. Path + each common extension
* 3. Path as directory + index files
*
* @param importSource - The raw import path (e.g., './utils' or '../lib/helper')
* @param importingFilePath - The relativePath of the file containing the import
* @param knownFiles - Set of all known file relative paths
* @returns The resolved relativePath or undefined if not found
*/
function resolveImportPath(importSource, importingFilePath, knownFiles) {
// Get the directory of the importing file
const importingDir = posix.dirname(importingFilePath);
// Resolve the relative import path against the importing file's directory
const resolved = posix.normalize(posix.join(importingDir, importSource));
// Try exact match first (path already has extension)
if (knownFiles.has(resolved)) {
return resolved;
}
// Try appending common extensions
for (const ext of RESOLVE_EXTENSIONS) {
const withExt = resolved + ext;
if (knownFiles.has(withExt)) {
return withExt;
}
}
// Try as directory import (index files)
for (const indexFile of INDEX_FILES) {
const asIndex = posix.join(resolved, indexFile);
if (knownFiles.has(asIndex)) {
return asIndex;
}
}
return undefined;
}
/**
* Builds import edges from resolved relative import paths.
*
* For each file's imports:
* - Only processes imports with relative paths (starting with '.' or '..')
* - Resolves the import path relative to the importing file's directory
* - If the resolved path matches a known file, creates an 'imports' edge
* - Skips external package imports and unresolvable imports
*
* @param files - Scanned file entries with metadata
* @param parseResults - Map of file relative path to its parse result
* @returns Array of DashboardEdge with type 'imports'
*/
export function buildImportEdges(files, parseResults) {
const edges = [];
const knownFiles = new Set(files.map(f => f.relativePath));
for (const file of files) {
const parseResult = parseResults.get(file.relativePath);
if (!parseResult)
continue;
for (const imp of parseResult.imports) {
// Skip external package imports (not starting with . or ..)
if (!imp.source.startsWith('.'))
continue;
const resolvedPath = resolveImportPath(imp.source, file.relativePath, knownFiles);
if (resolvedPath) {
edges.push({
source: file.relativePath,
target: resolvedPath,
type: 'imports',
});
}
}
}
return edges;
}
/**
* Builds call edges using a simple heuristic based on named imports.
*
* For each file's imports that resolve to a known file:
* - If the import has named imports (e.g., `import { formatDate } from './utils'`)
* - And the target file has a significant function node with that name
* - Creates a 'calls' edge from the importing file to the function node
*
* This is a best-effort heuristic — it won't catch all calls but provides useful edges.
*
* @param files - Scanned file entries with metadata
* @param parseResults - Map of file relative path to its parse result
* @returns Array of DashboardEdge with type 'calls'
*/
export function buildCallEdges(files, parseResults) {
const edges = [];
const knownFiles = new Set(files.map(f => f.relativePath));
// Build a set of significant function node IDs for quick lookup
const significantFunctions = new Set();
for (const file of files) {
const parseResult = parseResults.get(file.relativePath);
if (!parseResult)
continue;
for (const func of parseResult.functions) {
const lineCount = func.endLine - func.startLine + 1;
const isSignificant = func.isExported || lineCount >= 10;
if (isSignificant) {
significantFunctions.add(`${file.relativePath}::${func.name}`);
}
}
}
for (const file of files) {
const parseResult = parseResults.get(file.relativePath);
if (!parseResult)
continue;
for (const imp of parseResult.imports) {
// Skip external package imports
if (!imp.source.startsWith('.'))
continue;
const resolvedPath = resolveImportPath(imp.source, file.relativePath, knownFiles);
if (!resolvedPath)
continue;
// Check each named import against significant functions in the target file
for (const name of imp.names) {
const targetNodeId = `${resolvedPath}::${name}`;
if (significantFunctions.has(targetNodeId)) {
edges.push({
source: file.relativePath,
target: targetNodeId,
type: 'calls',
});
}
}
}
}
return edges;
}
/**
* Builds graph nodes and edges from scanned files and their parse results.
*
* Generates:
* - File nodes for each source file
* - Function nodes for each extracted function
* - Class nodes for each extracted class
* - Import edges between files
* - Containment edges (file → function/class)
* - Call relationship edges where detected
*
* @param files - Scanned file entries with metadata
* @param parseResults - Map of file relative path to its parse result
* @returns GraphOutput with all generated nodes and edges
*/
export function buildGraph(files, parseResults) {
const nodes = [];
const edges = [];
// Build file nodes
const fileNodes = buildFileNodes(files, parseResults);
nodes.push(...fileNodes);
// Build function nodes and containment edges
const functionResult = buildFunctionNodes(files, parseResults);
nodes.push(...functionResult.nodes);
edges.push(...functionResult.edges);
// Build class nodes and containment edges
const classResult = buildClassNodes(files, parseResults);
nodes.push(...classResult.nodes);
edges.push(...classResult.edges);
// Build import edges
const importEdges = buildImportEdges(files, parseResults);
edges.push(...importEdges);
// Build call edges
const callEdges = buildCallEdges(files, parseResults);
edges.push(...callEdges);
return {
nodes,
edges,
};
}