import { readFileSync } from 'fs'; /** * Extract parameter names from a params string, stripping type annotations. * e.g. "a: string, b: number = 5" -> ["a", "b"] */ function extractParamNames(paramsStr) { if (!paramsStr.trim()) return []; const params = []; let depth = 0; let current = ''; for (const ch of paramsStr) { if (ch === '<' || ch === '(' || ch === '{' || ch === '[') { depth++; current += ch; } else if (ch === '>' || ch === ')' || ch === '}' || ch === ']') { depth--; current += ch; } else if (ch === ',' && depth === 0) { params.push(current); current = ''; } else { current += ch; } } if (current.trim()) { params.push(current); } return params .map((p) => { const trimmed = p.trim(); // Handle destructured params like { a, b }: Type if (trimmed.startsWith('{')) { return trimmed.replace(/\}.*$/, '}').replace(/[{}]/g, '').split(',').map((s) => s.split(':')[0].trim()).filter(Boolean); } // Handle array destructured params like [a, b]: Type if (trimmed.startsWith('[')) { return trimmed.replace(/\].*$/, ']').replace(/[[\]]/g, '').split(',').map((s) => s.split(':')[0].trim()).filter(Boolean); } // Handle rest params: ...args: string[] const name = trimmed.replace(/^\.\.\./, '').split(/[?:=]/)[0].trim(); return name ? [name] : []; }) .flat() .filter(Boolean); } /** * Find the end line of a block starting at a given line by counting braces. * Returns the line number (1-indexed) of the closing brace. */ function findBlockEnd(lines, startLineIndex) { let depth = 0; let foundOpen = false; for (let i = startLineIndex; i < lines.length; i++) { const line = lines[i]; for (const ch of line) { if (ch === '{') { depth++; foundOpen = true; } else if (ch === '}') { depth--; if (foundOpen && depth === 0) { return i + 1; // 1-indexed } } } } // If no matching brace found, return end of file return lines.length; } /** * Parse TypeScript/JavaScript content and extract structural information. */ export function parseTypeScript(content) { const lines = content.split('\n'); const functions = []; const classes = []; const imports = []; const exports = []; // Track which names are already captured as function/class exports const functionAndClassExportNames = new Set(); // --- Extract Functions --- // Patterns for function declarations const funcPatterns = [ // export default async function name(params) /^(\s*)(export\s+default\s+)?(export\s+)?(?:async\s+)?function\s*\*?\s+(\w+)\s*\(([^)]*)\)/, // const/let/var name = (async) (params) => or function expression /^(\s*)(export\s+)?(?:const|let|var)\s+(\w+)\s*=\s*(?:async\s+)?\(([^)]*)\)\s*(?::\s*[^=]*?)?\s*=>/, // const/let/var name = async (params) => (alternate capture for async before parens) /^(\s*)(export\s+)?(?:const|let|var)\s+(\w+)\s*=\s*async\s*\(([^)]*)\)\s*(?::\s*[^=]*?)?\s*=>/, ]; for (let i = 0; i < lines.length; i++) { const line = lines[i]; // Try standard function declaration pattern const funcDeclMatch = line.match(/^(\s*)(export\s+default\s+|export\s+)?(?:async\s+)?function\s*\*?\s+(\w+)\s*\(([^)]*)\)/); if (funcDeclMatch) { const isExported = !!funcDeclMatch[2]; const name = funcDeclMatch[3]; const paramsStr = funcDeclMatch[4]; const startLine = i + 1; const endLine = findBlockEnd(lines, i); functions.push({ name, startLine, endLine, params: extractParamNames(paramsStr), isExported, }); if (isExported) { functionAndClassExportNames.add(name); if (!exports.includes(name)) exports.push(name); } continue; } // Try arrow function pattern: (export) const/let/var name = (async) (params) => const arrowMatch = line.match(/^(\s*)(export\s+)?(?:const|let|var)\s+(\w+)\s*=\s*(?:async\s+)?\(([^)]*)\)\s*(?::[^=]*?)?\s*=>/); if (arrowMatch) { const isExported = !!arrowMatch[2]; const name = arrowMatch[3]; const paramsStr = arrowMatch[4]; const startLine = i + 1; // Arrow functions may or may not have a block body const hasBlock = line.includes('=>') && (line.trim().endsWith('{') || lines[i]?.includes('{')); const endLine = hasBlock ? findBlockEnd(lines, i) : i + 1; functions.push({ name, startLine, endLine, params: extractParamNames(paramsStr), isExported, }); if (isExported) { functionAndClassExportNames.add(name); if (!exports.includes(name)) exports.push(name); } continue; } // Try arrow function with no parens for single param: const name = param => const singleParamArrow = line.match(/^(\s*)(export\s+)?(?:const|let|var)\s+(\w+)\s*=\s*(?:async\s+)?(\w+)\s*=>/); if (singleParamArrow) { const isExported = !!singleParamArrow[2]; const name = singleParamArrow[3]; const paramName = singleParamArrow[4]; const startLine = i + 1; const hasBlock = line.trim().endsWith('{'); const endLine = hasBlock ? findBlockEnd(lines, i) : i + 1; functions.push({ name, startLine, endLine, params: [paramName], isExported, }); if (isExported) { functionAndClassExportNames.add(name); if (!exports.includes(name)) exports.push(name); } continue; } } // --- Extract Classes --- for (let i = 0; i < lines.length; i++) { const line = lines[i]; const classMatch = line.match(/^(\s*)(export\s+default\s+|export\s+)?(?:abstract\s+)?class\s+(\w+)/); if (classMatch) { const isExported = !!classMatch[2]; const name = classMatch[3]; const startLine = i + 1; const endLine = findBlockEnd(lines, i); // Extract methods within the class body const methods = []; const classBodyLines = lines.slice(i + 1, endLine - 1); for (const bodyLine of classBodyLines) { // Match method patterns: methodName(, async methodName(, get/set methodName(, static methodName(, abstract methodName( const methodMatch = bodyLine.match(/^\s*(?:public\s+|private\s+|protected\s+)?(?:static\s+)?(?:abstract\s+)?(?:async\s+)?(?:get\s+|set\s+)?(?:\*\s*)?(\w+)\s*\(/); if (methodMatch) { const methodName = methodMatch[1]; // Skip control flow keywords that look like method calls if (methodName && methodName !== 'if' && methodName !== 'for' && methodName !== 'while' && methodName !== 'switch' && methodName !== 'catch') { if (!methods.includes(methodName)) { methods.push(methodName); } } } } classes.push({ name, startLine, endLine, methods, isExported, }); if (isExported) { functionAndClassExportNames.add(name); if (!exports.includes(name)) exports.push(name); } } } // --- Extract Imports --- for (let i = 0; i < lines.length; i++) { const line = lines[i]; // import { a, b } from 'module' const namedImportMatch = line.match(/^\s*import\s+\{([^}]+)\}\s+from\s+['"]([^'"]+)['"]/); if (namedImportMatch) { const names = namedImportMatch[1].split(',').map((n) => { const parts = n.trim().split(/\s+as\s+/); return parts[parts.length - 1].trim(); }).filter(Boolean); imports.push({ source: namedImportMatch[2], names }); continue; } // import name from 'module' (default import) const defaultImportMatch = line.match(/^\s*import\s+(\w+)\s+from\s+['"]([^'"]+)['"]/); if (defaultImportMatch) { imports.push({ source: defaultImportMatch[2], names: [defaultImportMatch[1]] }); continue; } // import * as name from 'module' (namespace import) const namespaceImportMatch = line.match(/^\s*import\s+\*\s+as\s+(\w+)\s+from\s+['"]([^'"]+)['"]/); if (namespaceImportMatch) { imports.push({ source: namespaceImportMatch[2], names: [namespaceImportMatch[1]] }); continue; } // import default, { named } from 'module' const mixedImportMatch = line.match(/^\s*import\s+(\w+)\s*,\s*\{([^}]+)\}\s+from\s+['"]([^'"]+)['"]/); if (mixedImportMatch) { const names = [mixedImportMatch[1]]; const namedParts = mixedImportMatch[2].split(',').map((n) => { const parts = n.trim().split(/\s+as\s+/); return parts[parts.length - 1].trim(); }).filter(Boolean); names.push(...namedParts); imports.push({ source: mixedImportMatch[3], names }); continue; } // import 'module' (side-effect import) const sideEffectMatch = line.match(/^\s*import\s+['"]([^'"]+)['"]/); if (sideEffectMatch) { imports.push({ source: sideEffectMatch[1], names: [] }); continue; } // const { a, b } = require('module') (destructured require) const destructuredRequireMatch = line.match(/^\s*(?:const|let|var)\s+\{([^}]+)\}\s*=\s*require\s*\(\s*['"]([^'"]+)['"]\s*\)/); if (destructuredRequireMatch) { const names = destructuredRequireMatch[1].split(',').map((n) => { const parts = n.trim().split(/\s*:\s*/); return parts[parts.length - 1].trim(); }).filter(Boolean); imports.push({ source: destructuredRequireMatch[2], names }); continue; } // const name = require('module') (CommonJS require) const requireMatch = line.match(/^\s*(?:const|let|var)\s+(\w+)\s*=\s*require\s*\(\s*['"]([^'"]+)['"]\s*\)/); if (requireMatch) { imports.push({ source: requireMatch[2], names: [requireMatch[1]] }); continue; } } // --- Extract Exports --- for (let i = 0; i < lines.length; i++) { const line = lines[i]; // export { a, b } or export { a, b } from 'module' const namedExportMatch = line.match(/^\s*export\s+\{([^}]+)\}/); if (namedExportMatch) { const names = namedExportMatch[1].split(',').map((n) => { const parts = n.trim().split(/\s+as\s+/); return parts[parts.length - 1].trim(); }).filter(Boolean); for (const name of names) { if (!exports.includes(name)) exports.push(name); } continue; } // export default name (not function/class which are already captured) const defaultExportMatch = line.match(/^\s*export\s+default\s+(\w+)\s*;?\s*$/); if (defaultExportMatch) { const name = defaultExportMatch[1]; // Skip if it's a keyword that starts a declaration (already captured) if (name !== 'function' && name !== 'class' && name !== 'abstract') { if (!exports.includes(name)) exports.push(name); } continue; } // export const/let/var name (not arrow functions which are already captured) const varExportMatch = line.match(/^\s*export\s+(?:const|let|var)\s+(\w+)/); if (varExportMatch) { const name = varExportMatch[1]; if (!exports.includes(name)) exports.push(name); continue; } } return { functions, classes, imports, exports }; } /** * Get the indentation level (number of leading spaces) of a line. * Tabs are counted as 4 spaces. */ function getIndentation(line) { let count = 0; for (const ch of line) { if (ch === ' ') count++; else if (ch === '\t') count += 4; else break; } return count; } /** * Find the end line of an indentation-based block (Python). * The block ends when a non-empty line at the same or lower indentation level is found, * or at end of file. */ function findIndentBlockEnd(lines, startLineIndex) { const baseIndent = getIndentation(lines[startLineIndex]); for (let i = startLineIndex + 1; i < lines.length; i++) { const line = lines[i]; // Skip empty lines and comment-only lines if (line.trim() === '' || line.trim().startsWith('#')) { continue; } const indent = getIndentation(line); if (indent <= baseIndent) { // The block ended at the previous non-empty line // Walk back to find the last non-empty line of the block for (let j = i - 1; j > startLineIndex; j--) { if (lines[j].trim() !== '') { return j + 1; // 1-indexed } } return startLineIndex + 1; // 1-indexed, just the def/class line itself } } // Block extends to end of file — find last non-empty line for (let j = lines.length - 1; j > startLineIndex; j--) { if (lines[j].trim() !== '') { return j + 1; // 1-indexed } } return lines.length; } /** * Extract parameter names from a Python params string, stripping type annotations and defaults. * e.g. "self, name: str, age: int = 0, *args, **kwargs" -> ["self", "name", "age", "args", "kwargs"] */ function extractPythonParamNames(paramsStr) { if (!paramsStr.trim()) return []; const params = []; let depth = 0; let current = ''; for (const ch of paramsStr) { if (ch === '(' || ch === '[' || ch === '{') { depth++; current += ch; } else if (ch === ')' || ch === ']' || ch === '}') { depth--; current += ch; } else if (ch === ',' && depth === 0) { params.push(current); current = ''; } else { current += ch; } } if (current.trim()) { params.push(current); } return params .map((p) => { let trimmed = p.trim(); // Strip leading * or ** trimmed = trimmed.replace(/^\*\*/, '').replace(/^\*/, ''); // Strip type annotation (everything after :) and default (everything after =) const name = trimmed.split(/[=:]/)[0].trim(); return name; }) .filter((name) => name !== '' && name !== '/'); } /** * Parse Python content and extract structural information. */ export function parsePython(content) { const lines = content.split('\n'); const functions = []; const classes = []; const imports = []; const exports = []; // Check for __all__ definition let allNames = null; const allMatch = content.match(/__all__\s*=\s*\[([^\]]*)\]/s); if (allMatch) { allNames = allMatch[1] .split(',') .map((s) => s.trim().replace(/^['"]|['"]$/g, '')) .filter(Boolean); } // --- Extract Classes first (to know which line ranges are class bodies) --- const classRanges = []; for (let i = 0; i < lines.length; i++) { const line = lines[i]; // Match class definitions: class Name: or class Name(Base): const classMatch = line.match(/^(\s*)class\s+(\w+)\s*(?:\([^)]*\))?\s*:/); if (classMatch) { const classIndent = getIndentation(line); const name = classMatch[2]; const startLine = i + 1; // 1-indexed const endLine = findIndentBlockEnd(lines, i); const isExported = !name.startsWith('_'); // Extract methods within the class body const methods = []; for (let j = i + 1; j < endLine; j++) { const bodyLine = lines[j]; const methodMatch = bodyLine.match(/^(\s*)(?:async\s+)?def\s+(\w+)\s*\(/); if (methodMatch) { const methodIndent = getIndentation(bodyLine); // Method must be indented more than the class if (methodIndent > classIndent) { methods.push(methodMatch[2]); } } } classes.push({ name, startLine, endLine, methods, isExported, }); classRanges.push({ start: i, end: endLine - 1 }); // 0-indexed range if (isExported) { if (!exports.includes(name)) exports.push(name); } } } // --- Extract Functions (skip those inside class bodies) --- for (let i = 0; i < lines.length; i++) { const line = lines[i]; // Skip lines that are inside a class body const insideClass = classRanges.some((r) => i > r.start && i <= r.end); if (insideClass) continue; // Match function definitions: def name(params): or async def name(params): const funcMatch = line.match(/^(\s*)(?:async\s+)?def\s+(\w+)\s*\(([^)]*)\)\s*(?:->.*?)?\s*:/); if (funcMatch) { const funcIndent = getIndentation(line); const name = funcMatch[2]; const paramsStr = funcMatch[3]; const startLine = i + 1; // 1-indexed const endLine = findIndentBlockEnd(lines, i); // A top-level function (indent 0) that doesn't start with _ is "exported" const isExported = funcIndent === 0 && !name.startsWith('_'); const params = extractPythonParamNames(paramsStr); functions.push({ name, startLine, endLine, params, isExported, }); if (isExported) { if (!exports.includes(name)) exports.push(name); } } } // --- Extract Imports --- for (let i = 0; i < lines.length; i++) { const line = lines[i]; // from module import (name1, name2) — multi-line const fromMultiLineMatch = line.match(/^\s*from\s+([\w.]+)\s+import\s*\(/); if (fromMultiLineMatch) { const source = fromMultiLineMatch[1]; let namesStr = ''; // Collect names until closing paren const afterParen = line.substring(line.indexOf('(') + 1); if (afterParen.includes(')')) { // Single line with parens: from module import (a, b) namesStr = afterParen.substring(0, afterParen.indexOf(')')); } else { namesStr = afterParen; for (let j = i + 1; j < lines.length; j++) { const contLine = lines[j]; if (contLine.includes(')')) { namesStr += contLine.substring(0, contLine.indexOf(')')); break; } namesStr += contLine; } } const names = namesStr .split(',') .map((n) => { const parts = n.trim().split(/\s+as\s+/); return parts[parts.length - 1].trim(); }) .filter(Boolean); imports.push({ source, names }); continue; } // from module import name1, name2 (single line) const fromImportMatch = line.match(/^\s*from\s+([\w.]+)\s+import\s+(.+)$/); if (fromImportMatch) { const source = fromImportMatch[1]; const importPart = fromImportMatch[2].trim(); if (importPart === '*') { imports.push({ source, names: ['*'] }); } else { const names = importPart .split(',') .map((n) => { const parts = n.trim().split(/\s+as\s+/); return parts[parts.length - 1].trim(); }) .filter(Boolean); imports.push({ source, names }); } continue; } // import module or import module as alias or import mod1, mod2 const importMatch = line.match(/^\s*import\s+(.+)$/); if (importMatch) { const importPart = importMatch[1].trim(); const modules = importPart.split(','); for (const mod of modules) { const parts = mod.trim().split(/\s+as\s+/); const source = parts[0].trim(); const alias = parts.length > 1 ? parts[parts.length - 1].trim() : source; imports.push({ source, names: [alias] }); } continue; } } // If __all__ is defined, use it as the exports list if (allNames) { // Replace exports with __all__ contents exports.length = 0; exports.push(...allNames); } return { functions, classes, imports, exports }; } /** * Parse Go content and extract structural information. */ export function parseGo(content) { const lines = content.split('\n'); const functions = []; const classes = []; const imports = []; const exports = []; // --- Extract Imports --- for (let i = 0; i < lines.length; i++) { const line = lines[i]; // Single import: import "pkg" const singleImport = line.match(/^\s*import\s+"([^"]+)"/); if (singleImport) { const source = singleImport[1]; const name = source.split('/').pop() || source; imports.push({ source, names: [name] }); continue; } // Block import: import ( ... ) const blockImportStart = line.match(/^\s*import\s*\(/); if (blockImportStart) { for (let j = i + 1; j < lines.length; j++) { const importLine = lines[j].trim(); if (importLine === ')') break; // Match "pkg" or alias "pkg" const pkgMatch = importLine.match(/^(?:(\w+)\s+)?"([^"]+)"/); if (pkgMatch) { const alias = pkgMatch[1]; const source = pkgMatch[2]; const name = alias || source.split('/').pop() || source; imports.push({ source, names: [name] }); } } continue; } } // --- Extract Types (struct/interface as classes) --- for (let i = 0; i < lines.length; i++) { const line = lines[i]; // type Name struct { or type Name interface { const typeMatch = line.match(/^\s*type\s+(\w+)\s+(?:struct|interface)\s*\{/); if (typeMatch) { const name = typeMatch[1]; const startLine = i + 1; const endLine = findBlockEnd(lines, i); const isExported = /^[A-Z]/.test(name); // Extract methods from interface bodies const methods = []; const bodyLines = lines.slice(i + 1, endLine - 1); for (const bodyLine of bodyLines) { const methodMatch = bodyLine.match(/^\s+(\w+)\s*\(/); if (methodMatch) { methods.push(methodMatch[1]); } } classes.push({ name, startLine, endLine, methods, isExported }); if (isExported && !exports.includes(name)) exports.push(name); } } // --- Extract Functions --- for (let i = 0; i < lines.length; i++) { const line = lines[i]; // func (receiver) Name(params) or func Name(params) const funcMatch = line.match(/^\s*func\s+(?:\(\s*\w+\s+\*?\w+\s*\)\s+)?(\w+)\s*\(([^)]*)\)/); if (funcMatch) { const name = funcMatch[1]; const paramsStr = funcMatch[2]; const startLine = i + 1; const endLine = line.includes('{') ? findBlockEnd(lines, i) : i + 1; const isExported = /^[A-Z]/.test(name); // Extract param names (Go params: name type, name type) const params = paramsStr .split(',') .map((p) => p.trim().split(/\s+/)[0]) .filter((p) => p && p !== ''); functions.push({ name, startLine, endLine, params, isExported }); if (isExported && !exports.includes(name)) exports.push(name); } } return { functions, classes, imports, exports }; } /** * Parse Rust content and extract structural information. */ export function parseRust(content) { const lines = content.split('\n'); const functions = []; const classes = []; const imports = []; const exports = []; // --- Extract Imports (use statements) --- for (let i = 0; i < lines.length; i++) { const line = lines[i]; // use path::to::item; or use path::to::{item1, item2}; const useMatch = line.match(/^\s*(?:pub\s+)?use\s+(.+);/); if (useMatch) { const usePath = useMatch[1].trim(); // Handle grouped imports: use std::collections::{HashMap, HashSet}; const groupMatch = usePath.match(/^(.+)::\{([^}]+)\}/); if (groupMatch) { const basePath = groupMatch[1]; const names = groupMatch[2].split(',').map((n) => n.trim().split(/\s+as\s+/).pop().trim()).filter(Boolean); imports.push({ source: basePath, names }); } else { // Simple use: use std::collections::HashMap; const parts = usePath.split('::'); const name = parts[parts.length - 1].split(/\s+as\s+/).pop().trim(); imports.push({ source: usePath, names: [name] }); } continue; } } // --- Extract Structs/Enums as classes --- for (let i = 0; i < lines.length; i++) { const line = lines[i]; // pub struct Name { or struct Name { or pub enum Name { or enum Name { const structMatch = line.match(/^\s*(pub\s+)?(?:struct|enum)\s+(\w+)/); if (structMatch) { const isExported = !!structMatch[1]; const name = structMatch[2]; const startLine = i + 1; const endLine = line.includes('{') ? findBlockEnd(lines, i) : i + 1; classes.push({ name, startLine, endLine, methods: [], isExported }); if (isExported && !exports.includes(name)) exports.push(name); } } // --- Extract impl blocks and their methods --- for (let i = 0; i < lines.length; i++) { const line = lines[i]; // impl Name { or impl Trait for Name { const implMatch = line.match(/^\s*impl\s+(?:\w+\s+for\s+)?(\w+)\s*\{/); if (implMatch) { const implName = implMatch[1]; const implEnd = findBlockEnd(lines, i); // Find the class entry for this impl const classEntry = classes.find((c) => c.name === implName); // Extract methods inside impl block for (let j = i + 1; j < implEnd - 1; j++) { const bodyLine = lines[j]; const fnMatch = bodyLine.match(/^\s*(pub\s+)?(?:async\s+)?fn\s+(\w+)\s*\(([^)]*)\)/); if (fnMatch) { const fnIsExported = !!fnMatch[1]; const fnName = fnMatch[2]; const fnParamsStr = fnMatch[3]; const fnStartLine = j + 1; const fnEndLine = bodyLine.includes('{') ? findBlockEnd(lines, j) : j + 1; // Extract param names (skip self/&self/&mut self) const params = fnParamsStr .split(',') .map((p) => p.trim()) .filter((p) => p && !p.startsWith('&') && p !== 'self' && !p.startsWith('mut self')) .map((p) => p.split(':')[0].replace(/^mut\s+/, '').trim()) .filter(Boolean); if (classEntry) { classEntry.methods.push(fnName); } functions.push({ name: fnName, startLine: fnStartLine, endLine: fnEndLine, params, isExported: fnIsExported }); if (fnIsExported && !exports.includes(fnName)) exports.push(fnName); } } } } // --- Extract standalone functions (not inside impl blocks) --- const implRanges = []; for (let i = 0; i < lines.length; i++) { const line = lines[i]; if (line.match(/^\s*impl\s+/)) { const end = findBlockEnd(lines, i); implRanges.push({ start: i, end: end - 1 }); } } for (let i = 0; i < lines.length; i++) { const line = lines[i]; // Skip lines inside impl blocks const insideImpl = implRanges.some((r) => i >= r.start && i <= r.end); if (insideImpl) continue; // pub fn name(params) or fn name(params) or pub async fn name(params) const fnMatch = line.match(/^\s*(pub\s+)?(?:async\s+)?fn\s+(\w+)\s*\(([^)]*)\)/); if (fnMatch) { const isExported = !!fnMatch[1]; const name = fnMatch[2]; const paramsStr = fnMatch[3]; const startLine = i + 1; const endLine = line.includes('{') ? findBlockEnd(lines, i) : i + 1; const params = paramsStr .split(',') .map((p) => p.trim()) .filter((p) => p && !p.startsWith('&') && p !== 'self' && !p.startsWith('mut self')) .map((p) => p.split(':')[0].replace(/^mut\s+/, '').trim()) .filter(Boolean); functions.push({ name, startLine, endLine, params, isExported }); if (isExported && !exports.includes(name)) exports.push(name); } } return { functions, classes, imports, exports }; } /** * Parse Java content and extract structural information. */ export function parseJava(content) { const lines = content.split('\n'); const functions = []; const classes = []; const imports = []; const exports = []; // --- Extract Imports --- for (let i = 0; i < lines.length; i++) { const line = lines[i]; // import package.Class; or import static package.Class.method; const importMatch = line.match(/^\s*import\s+(?:static\s+)?([^;]+);/); if (importMatch) { const source = importMatch[1].trim(); const parts = source.split('.'); const name = parts[parts.length - 1]; imports.push({ source, names: [name] }); } } // --- Extract Classes/Interfaces --- for (let i = 0; i < lines.length; i++) { const line = lines[i]; // public/protected/private class Name or interface Name const classMatch = line.match(/^\s*(?:(public|protected|private)\s+)?(?:abstract\s+)?(?:static\s+)?(?:final\s+)?(?:class|interface|enum)\s+(\w+)/); if (classMatch) { const visibility = classMatch[1]; const name = classMatch[2]; const startLine = i + 1; const endLine = findBlockEnd(lines, i); const isExported = visibility === 'public'; // Extract methods within the class body // We need to track depth so we only match methods at the class body level (depth 1) const methods = []; let depth = 0; for (let j = i; j < endLine; j++) { const bodyLine = lines[j]; // Check for method BEFORE counting braces on this line // A method declaration at class level means depth is currently 1 if (depth === 1) { const methodMatch = bodyLine.match(/^\s*(?:(?:public|protected|private)\s+)?(?:static\s+)?(?:final\s+)?(?:abstract\s+)?(?:synchronized\s+)?(?:\w+(?:<[^>]*>)?(?:\[\])*)\s+(\w+)\s*\(/); if (methodMatch) { const methodName = methodMatch[1]; if (methodName !== 'if' && methodName !== 'for' && methodName !== 'while' && methodName !== 'switch' && methodName !== 'catch') { if (!methods.includes(methodName)) { methods.push(methodName); } } } // Also match constructors const ctorMatch = bodyLine.match(new RegExp(`^\\s*(?:(?:public|protected|private)\\s+)?${name}\\s*\\(`)); if (ctorMatch && !methods.includes(name)) { methods.push(name); } } // Count braces after checking for methods for (const ch of bodyLine) { if (ch === '{') depth++; if (ch === '}') depth--; } } classes.push({ name, startLine, endLine, methods, isExported }); if (isExported && !exports.includes(name)) exports.push(name); // Extract methods as functions too depth = 0; for (let j = i; j < endLine; j++) { const bodyLine = lines[j]; if (depth === 1) { const methodMatch = bodyLine.match(/^\s*(public|protected|private)?\s*(?:static\s+)?(?:final\s+)?(?:abstract\s+)?(?:synchronized\s+)?(?:\w+(?:<[^>]*>)?(?:\[\])*)\s+(\w+)\s*\(([^)]*)\)/); if (methodMatch) { const methodVisibility = methodMatch[1]; const methodName = methodMatch[2]; const methodParams = methodMatch[3]; if (methodName !== 'if' && methodName !== 'for' && methodName !== 'while' && methodName !== 'switch' && methodName !== 'catch') { const fnIsExported = methodVisibility === 'public'; const fnStartLine = j + 1; const fnEndLine = bodyLine.includes('{') ? findBlockEnd(lines, j) : j + 1; // Extract param names from Java params (Type name, Type name) const params = methodParams .split(',') .map((p) => { const parts = p.trim().split(/\s+/); return parts.length >= 2 ? parts[parts.length - 1] : ''; }) .filter(Boolean); functions.push({ name: methodName, startLine: fnStartLine, endLine: fnEndLine, params, isExported: fnIsExported }); if (fnIsExported && !exports.includes(methodName)) exports.push(methodName); } } } for (const ch of bodyLine) { if (ch === '{') depth++; if (ch === '}') depth--; } } } } return { functions, classes, imports, exports }; } /** * Parse generic content (PHP, Ruby, or unknown languages) using common patterns. * Falls back to empty result if nothing is detected. */ export function parseGeneric(content) { const lines = content.split('\n'); const functions = []; const classes = []; const imports = []; const exports = []; // --- Extract Imports --- for (let i = 0; i < lines.length; i++) { const line = lines[i]; // PHP: use Namespace\Class; const phpUseMatch = line.match(/^\s*use\s+([^;]+);/); if (phpUseMatch) { const source = phpUseMatch[1].trim(); const parts = source.split('\\'); const name = parts[parts.length - 1]; imports.push({ source, names: [name] }); continue; } // PHP: require/include const phpRequireMatch = line.match(/^\s*(?:require|include)(?:_once)?\s+['"]([^'"]+)['"]/); if (phpRequireMatch) { const source = phpRequireMatch[1]; const name = source.split('/').pop()?.replace(/\.\w+$/, '') || source; imports.push({ source, names: [name] }); continue; } // Ruby: require 'gem' or require_relative 'file' const rubyRequireMatch = line.match(/^\s*require(?:_relative)?\s+['"]([^'"]+)['"]/); if (rubyRequireMatch) { const source = rubyRequireMatch[1]; const name = source.split('/').pop() || source; imports.push({ source, names: [name] }); continue; } } // --- Extract Classes --- for (let i = 0; i < lines.length; i++) { const line = lines[i]; // class Name (PHP, Ruby, etc.) const classMatch = line.match(/^\s*(?:(?:public|abstract|final)\s+)*class\s+(\w+)/); if (classMatch) { const name = classMatch[1]; const startLine = i + 1; // Use brace counting for PHP, indentation-based for Ruby let endLine; if (line.includes('{') || (i + 1 < lines.length && lines[i + 1]?.trim() === '{')) { endLine = findBlockEnd(lines, i); } else { // Ruby-style: look for 'end' keyword at same indentation endLine = findRubyBlockEnd(lines, i); } // Extract methods const methods = []; for (let j = i + 1; j < endLine; j++) { const bodyLine = lines[j]; const methodMatch = bodyLine.match(/^\s*(?:(?:public|protected|private)\s+)?(?:static\s+)?(?:function|def)\s+(\w+)/); if (methodMatch) { methods.push(methodMatch[1]); } } classes.push({ name, startLine, endLine, methods, isExported: true }); if (!exports.includes(name)) exports.push(name); } } // --- Extract Functions --- for (let i = 0; i < lines.length; i++) { const line = lines[i]; // Skip lines inside class bodies const insideClass = classes.some((c) => (i + 1) > c.startLine && (i + 1) < c.endLine); if (insideClass) continue; // function name( (PHP), def name( (Ruby/Python-like), fn name( (Rust-like) const funcMatch = line.match(/^\s*(?:(?:public|protected|private)\s+)?(?:static\s+)?(?:function|def|fn)\s+(\w+)\s*\(([^)]*)\)/); if (funcMatch) { const name = funcMatch[1]; const paramsStr = funcMatch[2]; const startLine = i + 1; let endLine; if (line.includes('{') || (i + 1 < lines.length && lines[i + 1]?.trim() === '{')) { endLine = findBlockEnd(lines, i); } else { // Ruby-style: look for 'end' keyword endLine = findRubyBlockEnd(lines, i); } const params = paramsStr .split(',') .map((p) => p.trim().replace(/^\$/, '').split(/[=:]/)[0].trim()) .filter(Boolean); functions.push({ name, startLine, endLine, params, isExported: true }); if (!exports.includes(name)) exports.push(name); } } return { functions, classes, imports, exports }; } /** * Find the end of a Ruby-style block (terminated by 'end' keyword at same or lower indentation). */ function findRubyBlockEnd(lines, startLineIndex) { const baseIndent = getIndentation(lines[startLineIndex]); for (let i = startLineIndex + 1; i < lines.length; i++) { const line = lines[i]; if (line.trim() === '') continue; const indent = getIndentation(line); if (indent <= baseIndent && line.trim() === 'end') { return i + 1; // 1-indexed } } // If no 'end' found, return end of file return lines.length; } /** * Parses a source file and extracts structural information using * regex-based extraction (no tree-sitter dependency). * * @param filePath - Absolute path to the file to parse * @param language - Detected language of the file (e.g., 'typescript', 'python') * @returns ParseResult with extracted functions, classes, imports, and exports */ export function parseFile(filePath, language) { const content = readFileSync(filePath, 'utf-8'); if (language === 'typescript' || language === 'javascript') { return parseTypeScript(content); } if (language === 'python') { return parsePython(content); } if (language === 'go') { return parseGo(content); } if (language === 'rust') { return parseRust(content); } if (language === 'java') { return parseJava(content); } // Ruby, PHP, and anything else → generic parser return parseGeneric(content); }