knowledge-graph-preview / cli /analyzer /domain-context.js
mr4's picture
Upload 136 files
fd8cdf5 verified
import * as fs from 'node:fs';
import * as path from 'node:path';
// ── Configuration ──────────────────────────────────────────────────────────
const MAX_ENTRY_POINTS = 200;
const MAX_SAMPLED_FILES = 40;
const MAX_LINES_PER_FILE = 80;
const MAX_OUTPUT_BYTES = 512 * 1024; // 512 KB
const ENTRY_POINT_PATTERNS = [
// HTTP routes
{
type: 'http',
description: 'Express/Koa route',
pattern: /(?:app|router|server)\s*\.\s*(?:get|post|put|patch|delete|all|use)\s*\(\s*['"]([^'"]*?)['"]/gi,
},
{
type: 'http',
description: 'Decorator route (Flask/FastAPI/NestJS)',
pattern: /@(?:app\.)?(?:route|get|post|put|patch|delete|api_view|RequestMapping|GetMapping|PostMapping)\s*\(\s*['"]([^'"]*?)['"]/gi,
},
{
type: 'http',
description: 'Next.js/Remix route handler',
pattern: /export\s+(?:async\s+)?function\s+(GET|POST|PUT|PATCH|DELETE|HEAD|OPTIONS)\b/g,
},
// CLI
{
type: 'cli',
description: 'CLI command',
pattern: /\.command\s*\(\s*['"]([\w\-:]+)['"]/g,
},
{
type: 'cli',
description: 'argparse subparser',
pattern: /add_parser\s*\(\s*['"]([\w\-]+)['"]/g,
},
// Event handlers
{
type: 'event',
description: 'Event listener',
pattern: /\.on\s*\(\s*['"]([\w\-:.]+)['"]/g,
},
{
type: 'event',
description: 'Event subscriber decorator',
pattern: /@(?:EventHandler|Subscribe|Listener|on_event)\s*\(\s*['"]([\w\-:.]+)['"]/g,
},
// Cron / scheduled
{
type: 'cron',
description: 'Cron schedule',
pattern: /@?(?:Cron|Schedule|Scheduled|crontab)\s*\(\s*['"]([^'"]+)['"]/gi,
},
// GraphQL
{
type: 'http',
description: 'GraphQL resolver',
pattern: /@(?:Query|Mutation|Subscription|Resolver)\s*\(/g,
},
// Exported handlers (generic)
{
type: 'manual',
description: 'Exported handler',
pattern: /export\s+(?:async\s+)?function\s+(handle\w+|process\w+|on\w+)\b/g,
},
];
// Regex to identify test files
const TEST_FILE_PATTERN = /(?:\.test\.|\.spec\.|__tests__|_test\.py|test_\w+\.py)/;
// ── Metadata files ────────────────────────────────────────────────────────
const METADATA_FILES = [
'package.json',
'Cargo.toml',
'go.mod',
'pyproject.toml',
'setup.py',
'setup.cfg',
'pom.xml',
'build.gradle',
'Gemfile',
'composer.json',
'mix.exs',
'Makefile',
'docker-compose.yml',
'docker-compose.yaml',
'README.md',
'README.rst',
'README.txt',
'README',
];
// ── Priority keywords for file signature selection ────────────────────────
const PRIORITY_KEYWORDS = [
'controller',
'service',
'handler',
'router',
'route',
'api',
'model',
'entity',
'repository',
'usecase',
'use_case',
'command',
'query',
'event',
'subscriber',
'listener',
'middleware',
'guard',
'interceptor',
'resolver',
'workflow',
'flow',
'process',
'pipeline',
'job',
'task',
];
// ── Entry point detection ──────────────────────────────────────────────────
/**
* Detects entry points in the given files by scanning for known patterns.
*/
export function detectEntryPoints(targetPath, files) {
const entryPoints = [];
for (const file of files) {
if (entryPoints.length >= MAX_ENTRY_POINTS)
break;
// Skip test files
if (TEST_FILE_PATTERN.test(file.relativePath))
continue;
let content;
try {
content = fs.readFileSync(file.path, 'utf-8');
}
catch {
continue;
}
const lines = content.split('\n');
for (const patternDef of ENTRY_POINT_PATTERNS) {
if (entryPoints.length >= MAX_ENTRY_POINTS)
break;
// Reset regex lastIndex for global patterns
patternDef.pattern.lastIndex = 0;
let match;
while ((match = patternDef.pattern.exec(content)) !== null) {
if (entryPoints.length >= MAX_ENTRY_POINTS)
break;
// Find line number
const lineNo = content.slice(0, match.index).split('\n').length;
// Extract snippet (5 lines around the match)
const startLine = Math.max(0, lineNo - 1);
const endLine = Math.min(lines.length, startLine + 5);
const snippet = lines.slice(startLine, endLine).join('\n');
entryPoints.push({
file: file.relativePath,
line: lineNo,
type: patternDef.type,
description: patternDef.description,
match: match[0].slice(0, 120),
snippet: snippet.slice(0, 300),
});
}
}
}
return entryPoints;
}
// ── File signature extraction ─────────────────────────────────────────────
/**
* Computes a priority score for a file path based on business-logic keywords.
*/
function priorityScore(filePath) {
const lower = filePath.toLowerCase();
let score = 0;
for (const kw of PRIORITY_KEYWORDS) {
if (lower.includes(kw)) {
score += 1;
}
}
return score;
}
/**
* Extracts file signatures from the top prioritized files.
*/
export function extractFileSignatures(targetPath, files) {
const signatures = [];
// Sort by priority score (highest first)
const sorted = [...files].sort((a, b) => priorityScore(b.relativePath) - priorityScore(a.relativePath));
for (const file of sorted.slice(0, MAX_SAMPLED_FILES)) {
let content;
try {
content = fs.readFileSync(file.path, 'utf-8');
}
catch {
continue;
}
const allLines = content.split('\n');
const firstLines = allLines.slice(0, MAX_LINES_PER_FILE);
const truncated = firstLines.join('\n');
// Extract exports (JS/TS)
const jsExports = [
...truncated.matchAll(/export\s+(?:default\s+)?(?:async\s+)?(?:function|class|const|let|var|interface|type|enum)\s+(\w+)/g),
].map(m => m[1]);
// Extract exports (Python: module-level def/class)
let exports = jsExports;
if (exports.length === 0) {
exports = [...truncated.matchAll(/^(?:def|class)\s+(\w+)/gm)].map(m => m[1]);
}
// Extract imports (first 20)
const importMatches = [
...truncated.matchAll(/(?:import\s+.*?from\s+['"]([^'"]+)['"]|from\s+([\w.]+)\s+import)/g),
];
const imports = importMatches.map(m => m[1] || m[2]).slice(0, 20);
signatures.push({
file: file.relativePath,
exports: exports.slice(0, 20),
imports,
lines: allLines.length,
preview: truncated.slice(0, 500),
});
}
return signatures;
}
// ── Metadata extraction ───────────────────────────────────────────────────
/**
* Reads project metadata from manifest files.
*/
export function extractMetadata(targetPath) {
const metadata = {};
for (const filename of METADATA_FILES) {
const filepath = path.join(targetPath, filename);
let content;
try {
content = fs.readFileSync(filepath, 'utf-8');
}
catch {
continue;
}
if (filename === 'package.json') {
try {
const pkg = JSON.parse(content);
metadata['package.json'] = {
name: pkg.name ?? null,
description: pkg.description ?? null,
scripts: Object.keys(pkg.scripts || {}),
dependencies: Object.keys(pkg.dependencies || {}),
devDependencies: Object.keys(pkg.devDependencies || {}),
};
}
catch {
metadata['package.json'] = content.slice(0, 500);
}
}
else if (filename.endsWith('.md') || filename.endsWith('.rst') || filename.endsWith('.txt') || filename === 'README') {
metadata[filename] = content.slice(0, 2000);
}
else if (filename.endsWith('.toml') || filename.endsWith('.cfg') || filename.endsWith('.mod')) {
metadata[filename] = content.slice(0, 1000);
}
else if (filename.endsWith('.json') ||
filename.endsWith('.yml') ||
filename.endsWith('.yaml') ||
filename.endsWith('.xml') ||
filename.endsWith('.gradle')) {
metadata[filename] = content.slice(0, 1000);
}
else {
metadata[filename] = content.slice(0, 1000);
}
}
return metadata;
}
// ── Truncation ────────────────────────────────────────────────────────────
/**
* Progressively trims the domain context to fit within maxBytes.
*/
export function truncateToFit(context, maxBytes = MAX_OUTPUT_BYTES) {
let output = JSON.stringify(context, null, 2);
if (Buffer.byteLength(output, 'utf-8') <= maxBytes) {
return context;
}
// 1. Trim file tree to first 200 entries
context.fileTree = context.fileTree.slice(0, 200);
output = JSON.stringify(context, null, 2);
if (Buffer.byteLength(output, 'utf-8') <= maxBytes) {
return context;
}
// 2. Trim previews in signatures to 200 chars
for (const sig of context.fileSignatures) {
sig.preview = sig.preview.slice(0, 200);
}
output = JSON.stringify(context, null, 2);
if (Buffer.byteLength(output, 'utf-8') <= maxBytes) {
return context;
}
// 3. Trim snippets in entry points to 100 chars
for (const ep of context.entryPoints) {
ep.snippet = ep.snippet.slice(0, 100);
}
output = JSON.stringify(context, null, 2);
if (Buffer.byteLength(output, 'utf-8') <= maxBytes) {
return context;
}
// 4. Reduce signatures to 20 and entry points to 100
context.fileSignatures = context.fileSignatures.slice(0, 20);
context.entryPoints = context.entryPoints.slice(0, 100);
return context;
}
// ── Main function ─────────────────────────────────────────────────────────
/**
* Generates the domain context for a project.
*
* @param targetPath - Absolute path to the project root
* @param files - Already-scanned file entries from the analyzer
* @returns The generated DomainContext
*/
export function generateDomainContext(targetPath, files) {
const fileTree = files.map(f => f.relativePath);
const entryPoints = detectEntryPoints(targetPath, files);
const fileSignatures = extractFileSignatures(targetPath, files);
const metadata = extractMetadata(targetPath);
let context = {
projectRoot: targetPath,
fileCount: files.length,
fileTree,
entryPoints,
fileSignatures,
metadata,
};
context = truncateToFit(context);
return context;
}