Spaces:

mr4
/

knowledge-graph-preview

Running

App Files Files Community

knowledge-graph-preview / cli /analyzer /domain-context.js

mr4's picture

Upload 136 files

fd8cdf5 verified 3 days ago

history blame contribute delete

11.9 kB

	import * as fs from 'node:fs';
	import * as path from 'node:path';
	// ── Configuration ──────────────────────────────────────────────────────────
	const MAX_ENTRY_POINTS = 200;
	const MAX_SAMPLED_FILES = 40;
	const MAX_LINES_PER_FILE = 80;
	const MAX_OUTPUT_BYTES = 512 * 1024; // 512 KB
	const ENTRY_POINT_PATTERNS = [
	// HTTP routes
	{
	type: 'http',
	description: 'Express/Koa route',
	pattern: /(?:app\|router\|server)\s\.\s(?:get\|post\|put\|patch\|delete\|all\|use)\s\(\s['"]([^'"]*?)['"]/gi,
	},
	{
	type: 'http',
	description: 'Decorator route (Flask/FastAPI/NestJS)',
	pattern: /@(?:app\.)?(?:route\|get\|post\|put\|patch\|delete\|api_view\|RequestMapping\|GetMapping\|PostMapping)\s\(\s['"]([^'"]*?)['"]/gi,
	},
	{
	type: 'http',
	description: 'Next.js/Remix route handler',
	pattern: /export\s+(?:async\s+)?function\s+(GET\|POST\|PUT\|PATCH\|DELETE\|HEAD\|OPTIONS)\b/g,
	},
	// CLI
	{
	type: 'cli',
	description: 'CLI command',
	pattern: /\.command\s\(\s['"]([\w\-:]+)['"]/g,
	},
	{
	type: 'cli',
	description: 'argparse subparser',
	pattern: /add_parser\s\(\s['"]([\w\-]+)['"]/g,
	},
	// Event handlers
	{
	type: 'event',
	description: 'Event listener',
	pattern: /\.on\s\(\s['"]([\w\-:.]+)['"]/g,
	},
	{
	type: 'event',
	description: 'Event subscriber decorator',
	pattern: /@(?:EventHandler\|Subscribe\|Listener\|on_event)\s\(\s['"]([\w\-:.]+)['"]/g,
	},
	// Cron / scheduled
	{
	type: 'cron',
	description: 'Cron schedule',
	pattern: /@?(?:Cron\|Schedule\|Scheduled\|crontab)\s\(\s['"]([^'"]+)['"]/gi,
	},
	// GraphQL
	{
	type: 'http',
	description: 'GraphQL resolver',
	pattern: /@(?:Query\|Mutation\|Subscription\|Resolver)\s*\(/g,
	},
	// Exported handlers (generic)
	{
	type: 'manual',
	description: 'Exported handler',
	pattern: /export\s+(?:async\s+)?function\s+(handle\w+\|process\w+\|on\w+)\b/g,
	},
	];
	// Regex to identify test files
	const TEST_FILE_PATTERN = /(?:\.test\.\|\.spec\.\|__tests__\|_test\.py\|test_\w+\.py)/;
	// ── Metadata files ────────────────────────────────────────────────────────
	const METADATA_FILES = [
	'package.json',
	'Cargo.toml',
	'go.mod',
	'pyproject.toml',
	'setup.py',
	'setup.cfg',
	'pom.xml',
	'build.gradle',
	'Gemfile',
	'composer.json',
	'mix.exs',
	'Makefile',
	'docker-compose.yml',
	'docker-compose.yaml',
	'README.md',
	'README.rst',
	'README.txt',
	'README',
	];
	// ── Priority keywords for file signature selection ────────────────────────
	const PRIORITY_KEYWORDS = [
	'controller',
	'service',
	'handler',
	'router',
	'route',
	'api',
	'model',
	'entity',
	'repository',
	'usecase',
	'use_case',
	'command',
	'query',
	'event',
	'subscriber',
	'listener',
	'middleware',
	'guard',
	'interceptor',
	'resolver',
	'workflow',
	'flow',
	'process',
	'pipeline',
	'job',
	'task',
	];
	// ── Entry point detection ──────────────────────────────────────────────────
	/**
	* Detects entry points in the given files by scanning for known patterns.
	*/
	export function detectEntryPoints(targetPath, files) {
	const entryPoints = [];
	for (const file of files) {
	if (entryPoints.length >= MAX_ENTRY_POINTS)
	break;
	// Skip test files
	if (TEST_FILE_PATTERN.test(file.relativePath))
	continue;
	let content;
	try {
	content = fs.readFileSync(file.path, 'utf-8');
	}
	catch {
	continue;
	}
	const lines = content.split('\n');
	for (const patternDef of ENTRY_POINT_PATTERNS) {
	if (entryPoints.length >= MAX_ENTRY_POINTS)
	break;
	// Reset regex lastIndex for global patterns
	patternDef.pattern.lastIndex = 0;
	let match;
	while ((match = patternDef.pattern.exec(content)) !== null) {
	if (entryPoints.length >= MAX_ENTRY_POINTS)
	break;
	// Find line number
	const lineNo = content.slice(0, match.index).split('\n').length;
	// Extract snippet (5 lines around the match)
	const startLine = Math.max(0, lineNo - 1);
	const endLine = Math.min(lines.length, startLine + 5);
	const snippet = lines.slice(startLine, endLine).join('\n');
	entryPoints.push({
	file: file.relativePath,
	line: lineNo,
	type: patternDef.type,
	description: patternDef.description,
	match: match[0].slice(0, 120),
	snippet: snippet.slice(0, 300),
	});
	}
	}
	}
	return entryPoints;
	}
	// ── File signature extraction ─────────────────────────────────────────────
	/**
	* Computes a priority score for a file path based on business-logic keywords.
	*/
	function priorityScore(filePath) {
	const lower = filePath.toLowerCase();
	let score = 0;
	for (const kw of PRIORITY_KEYWORDS) {
	if (lower.includes(kw)) {
	score += 1;
	}
	}
	return score;
	}
	/**
	* Extracts file signatures from the top prioritized files.
	*/
	export function extractFileSignatures(targetPath, files) {
	const signatures = [];
	// Sort by priority score (highest first)
	const sorted = [...files].sort((a, b) => priorityScore(b.relativePath) - priorityScore(a.relativePath));
	for (const file of sorted.slice(0, MAX_SAMPLED_FILES)) {
	let content;
	try {
	content = fs.readFileSync(file.path, 'utf-8');
	}
	catch {
	continue;
	}
	const allLines = content.split('\n');
	const firstLines = allLines.slice(0, MAX_LINES_PER_FILE);
	const truncated = firstLines.join('\n');
	// Extract exports (JS/TS)
	const jsExports = [
	...truncated.matchAll(/export\s+(?:default\s+)?(?:async\s+)?(?:function\|class\|const\|let\|var\|interface\|type\|enum)\s+(\w+)/g),
	].map(m => m[1]);
	// Extract exports (Python: module-level def/class)
	let exports = jsExports;
	if (exports.length === 0) {
	exports = [...truncated.matchAll(/^(?:def\|class)\s+(\w+)/gm)].map(m => m[1]);
	}
	// Extract imports (first 20)
	const importMatches = [
	...truncated.matchAll(/(?:import\s+.*?from\s+['"]([^'"]+)['"]\|from\s+([\w.]+)\s+import)/g),
	];
	const imports = importMatches.map(m => m[1] \|\| m[2]).slice(0, 20);
	signatures.push({
	file: file.relativePath,
	exports: exports.slice(0, 20),
	imports,
	lines: allLines.length,
	preview: truncated.slice(0, 500),
	});
	}
	return signatures;
	}
	// ── Metadata extraction ───────────────────────────────────────────────────
	/**
	* Reads project metadata from manifest files.
	*/
	export function extractMetadata(targetPath) {
	const metadata = {};
	for (const filename of METADATA_FILES) {
	const filepath = path.join(targetPath, filename);
	let content;
	try {
	content = fs.readFileSync(filepath, 'utf-8');
	}
	catch {
	continue;
	}
	if (filename === 'package.json') {
	try {
	const pkg = JSON.parse(content);
	metadata['package.json'] = {
	name: pkg.name ?? null,
	description: pkg.description ?? null,
	scripts: Object.keys(pkg.scripts \|\| {}),
	dependencies: Object.keys(pkg.dependencies \|\| {}),
	devDependencies: Object.keys(pkg.devDependencies \|\| {}),
	};
	}
	catch {
	metadata['package.json'] = content.slice(0, 500);
	}
	}
	else if (filename.endsWith('.md') \|\| filename.endsWith('.rst') \|\| filename.endsWith('.txt') \|\| filename === 'README') {
	metadata[filename] = content.slice(0, 2000);
	}
	else if (filename.endsWith('.toml') \|\| filename.endsWith('.cfg') \|\| filename.endsWith('.mod')) {
	metadata[filename] = content.slice(0, 1000);
	}
	else if (filename.endsWith('.json') \|\|
	filename.endsWith('.yml') \|\|
	filename.endsWith('.yaml') \|\|
	filename.endsWith('.xml') \|\|
	filename.endsWith('.gradle')) {
	metadata[filename] = content.slice(0, 1000);
	}
	else {
	metadata[filename] = content.slice(0, 1000);
	}
	}
	return metadata;
	}
	// ── Truncation ────────────────────────────────────────────────────────────
	/**
	* Progressively trims the domain context to fit within maxBytes.
	*/
	export function truncateToFit(context, maxBytes = MAX_OUTPUT_BYTES) {
	let output = JSON.stringify(context, null, 2);
	if (Buffer.byteLength(output, 'utf-8') <= maxBytes) {
	return context;
	}
	// 1. Trim file tree to first 200 entries
	context.fileTree = context.fileTree.slice(0, 200);
	output = JSON.stringify(context, null, 2);
	if (Buffer.byteLength(output, 'utf-8') <= maxBytes) {
	return context;
	}
	// 2. Trim previews in signatures to 200 chars
	for (const sig of context.fileSignatures) {
	sig.preview = sig.preview.slice(0, 200);
	}
	output = JSON.stringify(context, null, 2);
	if (Buffer.byteLength(output, 'utf-8') <= maxBytes) {
	return context;
	}
	// 3. Trim snippets in entry points to 100 chars
	for (const ep of context.entryPoints) {
	ep.snippet = ep.snippet.slice(0, 100);
	}
	output = JSON.stringify(context, null, 2);
	if (Buffer.byteLength(output, 'utf-8') <= maxBytes) {
	return context;
	}
	// 4. Reduce signatures to 20 and entry points to 100
	context.fileSignatures = context.fileSignatures.slice(0, 20);
	context.entryPoints = context.entryPoints.slice(0, 100);
	return context;
	}
	// ── Main function ─────────────────────────────────────────────────────────
	/**
	* Generates the domain context for a project.
	*
	* @param targetPath - Absolute path to the project root
	* @param files - Already-scanned file entries from the analyzer
	* @returns The generated DomainContext
	*/
	export function generateDomainContext(targetPath, files) {
	const fileTree = files.map(f => f.relativePath);
	const entryPoints = detectEntryPoints(targetPath, files);
	const fileSignatures = extractFileSignatures(targetPath, files);
	const metadata = extractMetadata(targetPath);
	let context = {
	projectRoot: targetPath,
	fileCount: files.length,
	fileTree,
	entryPoints,
	fileSignatures,
	metadata,
	};
	context = truncateToFit(context);
	return context;
	}