import * as fs from 'node:fs'; import * as path from 'node:path'; // ── Configuration ────────────────────────────────────────────────────────── const MAX_ENTRY_POINTS = 200; const MAX_SAMPLED_FILES = 40; const MAX_LINES_PER_FILE = 80; const MAX_OUTPUT_BYTES = 512 * 1024; // 512 KB const ENTRY_POINT_PATTERNS = [ // HTTP routes { type: 'http', description: 'Express/Koa route', pattern: /(?:app|router|server)\s*\.\s*(?:get|post|put|patch|delete|all|use)\s*\(\s*['"]([^'"]*?)['"]/gi, }, { type: 'http', description: 'Decorator route (Flask/FastAPI/NestJS)', pattern: /@(?:app\.)?(?:route|get|post|put|patch|delete|api_view|RequestMapping|GetMapping|PostMapping)\s*\(\s*['"]([^'"]*?)['"]/gi, }, { type: 'http', description: 'Next.js/Remix route handler', pattern: /export\s+(?:async\s+)?function\s+(GET|POST|PUT|PATCH|DELETE|HEAD|OPTIONS)\b/g, }, // CLI { type: 'cli', description: 'CLI command', pattern: /\.command\s*\(\s*['"]([\w\-:]+)['"]/g, }, { type: 'cli', description: 'argparse subparser', pattern: /add_parser\s*\(\s*['"]([\w\-]+)['"]/g, }, // Event handlers { type: 'event', description: 'Event listener', pattern: /\.on\s*\(\s*['"]([\w\-:.]+)['"]/g, }, { type: 'event', description: 'Event subscriber decorator', pattern: /@(?:EventHandler|Subscribe|Listener|on_event)\s*\(\s*['"]([\w\-:.]+)['"]/g, }, // Cron / scheduled { type: 'cron', description: 'Cron schedule', pattern: /@?(?:Cron|Schedule|Scheduled|crontab)\s*\(\s*['"]([^'"]+)['"]/gi, }, // GraphQL { type: 'http', description: 'GraphQL resolver', pattern: /@(?:Query|Mutation|Subscription|Resolver)\s*\(/g, }, // Exported handlers (generic) { type: 'manual', description: 'Exported handler', pattern: /export\s+(?:async\s+)?function\s+(handle\w+|process\w+|on\w+)\b/g, }, ]; // Regex to identify test files const TEST_FILE_PATTERN = /(?:\.test\.|\.spec\.|__tests__|_test\.py|test_\w+\.py)/; // ── Metadata files ──────────────────────────────────────────────────────── const METADATA_FILES = [ 'package.json', 'Cargo.toml', 'go.mod', 'pyproject.toml', 'setup.py', 'setup.cfg', 'pom.xml', 'build.gradle', 'Gemfile', 'composer.json', 'mix.exs', 'Makefile', 'docker-compose.yml', 'docker-compose.yaml', 'README.md', 'README.rst', 'README.txt', 'README', ]; // ── Priority keywords for file signature selection ──────────────────────── const PRIORITY_KEYWORDS = [ 'controller', 'service', 'handler', 'router', 'route', 'api', 'model', 'entity', 'repository', 'usecase', 'use_case', 'command', 'query', 'event', 'subscriber', 'listener', 'middleware', 'guard', 'interceptor', 'resolver', 'workflow', 'flow', 'process', 'pipeline', 'job', 'task', ]; // ── Entry point detection ────────────────────────────────────────────────── /** * Detects entry points in the given files by scanning for known patterns. */ export function detectEntryPoints(targetPath, files) { const entryPoints = []; for (const file of files) { if (entryPoints.length >= MAX_ENTRY_POINTS) break; // Skip test files if (TEST_FILE_PATTERN.test(file.relativePath)) continue; let content; try { content = fs.readFileSync(file.path, 'utf-8'); } catch { continue; } const lines = content.split('\n'); for (const patternDef of ENTRY_POINT_PATTERNS) { if (entryPoints.length >= MAX_ENTRY_POINTS) break; // Reset regex lastIndex for global patterns patternDef.pattern.lastIndex = 0; let match; while ((match = patternDef.pattern.exec(content)) !== null) { if (entryPoints.length >= MAX_ENTRY_POINTS) break; // Find line number const lineNo = content.slice(0, match.index).split('\n').length; // Extract snippet (5 lines around the match) const startLine = Math.max(0, lineNo - 1); const endLine = Math.min(lines.length, startLine + 5); const snippet = lines.slice(startLine, endLine).join('\n'); entryPoints.push({ file: file.relativePath, line: lineNo, type: patternDef.type, description: patternDef.description, match: match[0].slice(0, 120), snippet: snippet.slice(0, 300), }); } } } return entryPoints; } // ── File signature extraction ───────────────────────────────────────────── /** * Computes a priority score for a file path based on business-logic keywords. */ function priorityScore(filePath) { const lower = filePath.toLowerCase(); let score = 0; for (const kw of PRIORITY_KEYWORDS) { if (lower.includes(kw)) { score += 1; } } return score; } /** * Extracts file signatures from the top prioritized files. */ export function extractFileSignatures(targetPath, files) { const signatures = []; // Sort by priority score (highest first) const sorted = [...files].sort((a, b) => priorityScore(b.relativePath) - priorityScore(a.relativePath)); for (const file of sorted.slice(0, MAX_SAMPLED_FILES)) { let content; try { content = fs.readFileSync(file.path, 'utf-8'); } catch { continue; } const allLines = content.split('\n'); const firstLines = allLines.slice(0, MAX_LINES_PER_FILE); const truncated = firstLines.join('\n'); // Extract exports (JS/TS) const jsExports = [ ...truncated.matchAll(/export\s+(?:default\s+)?(?:async\s+)?(?:function|class|const|let|var|interface|type|enum)\s+(\w+)/g), ].map(m => m[1]); // Extract exports (Python: module-level def/class) let exports = jsExports; if (exports.length === 0) { exports = [...truncated.matchAll(/^(?:def|class)\s+(\w+)/gm)].map(m => m[1]); } // Extract imports (first 20) const importMatches = [ ...truncated.matchAll(/(?:import\s+.*?from\s+['"]([^'"]+)['"]|from\s+([\w.]+)\s+import)/g), ]; const imports = importMatches.map(m => m[1] || m[2]).slice(0, 20); signatures.push({ file: file.relativePath, exports: exports.slice(0, 20), imports, lines: allLines.length, preview: truncated.slice(0, 500), }); } return signatures; } // ── Metadata extraction ─────────────────────────────────────────────────── /** * Reads project metadata from manifest files. */ export function extractMetadata(targetPath) { const metadata = {}; for (const filename of METADATA_FILES) { const filepath = path.join(targetPath, filename); let content; try { content = fs.readFileSync(filepath, 'utf-8'); } catch { continue; } if (filename === 'package.json') { try { const pkg = JSON.parse(content); metadata['package.json'] = { name: pkg.name ?? null, description: pkg.description ?? null, scripts: Object.keys(pkg.scripts || {}), dependencies: Object.keys(pkg.dependencies || {}), devDependencies: Object.keys(pkg.devDependencies || {}), }; } catch { metadata['package.json'] = content.slice(0, 500); } } else if (filename.endsWith('.md') || filename.endsWith('.rst') || filename.endsWith('.txt') || filename === 'README') { metadata[filename] = content.slice(0, 2000); } else if (filename.endsWith('.toml') || filename.endsWith('.cfg') || filename.endsWith('.mod')) { metadata[filename] = content.slice(0, 1000); } else if (filename.endsWith('.json') || filename.endsWith('.yml') || filename.endsWith('.yaml') || filename.endsWith('.xml') || filename.endsWith('.gradle')) { metadata[filename] = content.slice(0, 1000); } else { metadata[filename] = content.slice(0, 1000); } } return metadata; } // ── Truncation ──────────────────────────────────────────────────────────── /** * Progressively trims the domain context to fit within maxBytes. */ export function truncateToFit(context, maxBytes = MAX_OUTPUT_BYTES) { let output = JSON.stringify(context, null, 2); if (Buffer.byteLength(output, 'utf-8') <= maxBytes) { return context; } // 1. Trim file tree to first 200 entries context.fileTree = context.fileTree.slice(0, 200); output = JSON.stringify(context, null, 2); if (Buffer.byteLength(output, 'utf-8') <= maxBytes) { return context; } // 2. Trim previews in signatures to 200 chars for (const sig of context.fileSignatures) { sig.preview = sig.preview.slice(0, 200); } output = JSON.stringify(context, null, 2); if (Buffer.byteLength(output, 'utf-8') <= maxBytes) { return context; } // 3. Trim snippets in entry points to 100 chars for (const ep of context.entryPoints) { ep.snippet = ep.snippet.slice(0, 100); } output = JSON.stringify(context, null, 2); if (Buffer.byteLength(output, 'utf-8') <= maxBytes) { return context; } // 4. Reduce signatures to 20 and entry points to 100 context.fileSignatures = context.fileSignatures.slice(0, 20); context.entryPoints = context.entryPoints.slice(0, 100); return context; } // ── Main function ───────────────────────────────────────────────────────── /** * Generates the domain context for a project. * * @param targetPath - Absolute path to the project root * @param files - Already-scanned file entries from the analyzer * @returns The generated DomainContext */ export function generateDomainContext(targetPath, files) { const fileTree = files.map(f => f.relativePath); const entryPoints = detectEntryPoints(targetPath, files); const fileSignatures = extractFileSignatures(targetPath, files); const metadata = extractMetadata(targetPath); let context = { projectRoot: targetPath, fileCount: files.length, fileTree, entryPoints, fileSignatures, metadata, }; context = truncateToFit(context); return context; }