Spaces:
Running
Running
| import { readFileSync } from 'fs'; | |
| /** | |
| * Extract parameter names from a params string, stripping type annotations. | |
| * e.g. "a: string, b: number = 5" -> ["a", "b"] | |
| */ | |
| function extractParamNames(paramsStr) { | |
| if (!paramsStr.trim()) | |
| return []; | |
| const params = []; | |
| let depth = 0; | |
| let current = ''; | |
| for (const ch of paramsStr) { | |
| if (ch === '<' || ch === '(' || ch === '{' || ch === '[') { | |
| depth++; | |
| current += ch; | |
| } | |
| else if (ch === '>' || ch === ')' || ch === '}' || ch === ']') { | |
| depth--; | |
| current += ch; | |
| } | |
| else if (ch === ',' && depth === 0) { | |
| params.push(current); | |
| current = ''; | |
| } | |
| else { | |
| current += ch; | |
| } | |
| } | |
| if (current.trim()) { | |
| params.push(current); | |
| } | |
| return params | |
| .map((p) => { | |
| const trimmed = p.trim(); | |
| // Handle destructured params like { a, b }: Type | |
| if (trimmed.startsWith('{')) { | |
| return trimmed.replace(/\}.*$/, '}').replace(/[{}]/g, '').split(',').map((s) => s.split(':')[0].trim()).filter(Boolean); | |
| } | |
| // Handle array destructured params like [a, b]: Type | |
| if (trimmed.startsWith('[')) { | |
| return trimmed.replace(/\].*$/, ']').replace(/[[\]]/g, '').split(',').map((s) => s.split(':')[0].trim()).filter(Boolean); | |
| } | |
| // Handle rest params: ...args: string[] | |
| const name = trimmed.replace(/^\.\.\./, '').split(/[?:=]/)[0].trim(); | |
| return name ? [name] : []; | |
| }) | |
| .flat() | |
| .filter(Boolean); | |
| } | |
| /** | |
| * Find the end line of a block starting at a given line by counting braces. | |
| * Returns the line number (1-indexed) of the closing brace. | |
| */ | |
| function findBlockEnd(lines, startLineIndex) { | |
| let depth = 0; | |
| let foundOpen = false; | |
| for (let i = startLineIndex; i < lines.length; i++) { | |
| const line = lines[i]; | |
| for (const ch of line) { | |
| if (ch === '{') { | |
| depth++; | |
| foundOpen = true; | |
| } | |
| else if (ch === '}') { | |
| depth--; | |
| if (foundOpen && depth === 0) { | |
| return i + 1; // 1-indexed | |
| } | |
| } | |
| } | |
| } | |
| // If no matching brace found, return end of file | |
| return lines.length; | |
| } | |
| /** | |
| * Parse TypeScript/JavaScript content and extract structural information. | |
| */ | |
| export function parseTypeScript(content) { | |
| const lines = content.split('\n'); | |
| const functions = []; | |
| const classes = []; | |
| const imports = []; | |
| const exports = []; | |
| // Track which names are already captured as function/class exports | |
| const functionAndClassExportNames = new Set(); | |
| // --- Extract Functions --- | |
| // Patterns for function declarations | |
| const funcPatterns = [ | |
| // export default async function name(params) | |
| /^(\s*)(export\s+default\s+)?(export\s+)?(?:async\s+)?function\s*\*?\s+(\w+)\s*\(([^)]*)\)/, | |
| // const/let/var name = (async) (params) => or function expression | |
| /^(\s*)(export\s+)?(?:const|let|var)\s+(\w+)\s*=\s*(?:async\s+)?\(([^)]*)\)\s*(?::\s*[^=]*?)?\s*=>/, | |
| // const/let/var name = async (params) => (alternate capture for async before parens) | |
| /^(\s*)(export\s+)?(?:const|let|var)\s+(\w+)\s*=\s*async\s*\(([^)]*)\)\s*(?::\s*[^=]*?)?\s*=>/, | |
| ]; | |
| for (let i = 0; i < lines.length; i++) { | |
| const line = lines[i]; | |
| // Try standard function declaration pattern | |
| const funcDeclMatch = line.match(/^(\s*)(export\s+default\s+|export\s+)?(?:async\s+)?function\s*\*?\s+(\w+)\s*\(([^)]*)\)/); | |
| if (funcDeclMatch) { | |
| const isExported = !!funcDeclMatch[2]; | |
| const name = funcDeclMatch[3]; | |
| const paramsStr = funcDeclMatch[4]; | |
| const startLine = i + 1; | |
| const endLine = findBlockEnd(lines, i); | |
| functions.push({ | |
| name, | |
| startLine, | |
| endLine, | |
| params: extractParamNames(paramsStr), | |
| isExported, | |
| }); | |
| if (isExported) { | |
| functionAndClassExportNames.add(name); | |
| if (!exports.includes(name)) | |
| exports.push(name); | |
| } | |
| continue; | |
| } | |
| // Try arrow function pattern: (export) const/let/var name = (async) (params) => | |
| const arrowMatch = line.match(/^(\s*)(export\s+)?(?:const|let|var)\s+(\w+)\s*=\s*(?:async\s+)?\(([^)]*)\)\s*(?::[^=]*?)?\s*=>/); | |
| if (arrowMatch) { | |
| const isExported = !!arrowMatch[2]; | |
| const name = arrowMatch[3]; | |
| const paramsStr = arrowMatch[4]; | |
| const startLine = i + 1; | |
| // Arrow functions may or may not have a block body | |
| const hasBlock = line.includes('=>') && (line.trim().endsWith('{') || lines[i]?.includes('{')); | |
| const endLine = hasBlock ? findBlockEnd(lines, i) : i + 1; | |
| functions.push({ | |
| name, | |
| startLine, | |
| endLine, | |
| params: extractParamNames(paramsStr), | |
| isExported, | |
| }); | |
| if (isExported) { | |
| functionAndClassExportNames.add(name); | |
| if (!exports.includes(name)) | |
| exports.push(name); | |
| } | |
| continue; | |
| } | |
| // Try arrow function with no parens for single param: const name = param => | |
| const singleParamArrow = line.match(/^(\s*)(export\s+)?(?:const|let|var)\s+(\w+)\s*=\s*(?:async\s+)?(\w+)\s*=>/); | |
| if (singleParamArrow) { | |
| const isExported = !!singleParamArrow[2]; | |
| const name = singleParamArrow[3]; | |
| const paramName = singleParamArrow[4]; | |
| const startLine = i + 1; | |
| const hasBlock = line.trim().endsWith('{'); | |
| const endLine = hasBlock ? findBlockEnd(lines, i) : i + 1; | |
| functions.push({ | |
| name, | |
| startLine, | |
| endLine, | |
| params: [paramName], | |
| isExported, | |
| }); | |
| if (isExported) { | |
| functionAndClassExportNames.add(name); | |
| if (!exports.includes(name)) | |
| exports.push(name); | |
| } | |
| continue; | |
| } | |
| } | |
| // --- Extract Classes --- | |
| for (let i = 0; i < lines.length; i++) { | |
| const line = lines[i]; | |
| const classMatch = line.match(/^(\s*)(export\s+default\s+|export\s+)?(?:abstract\s+)?class\s+(\w+)/); | |
| if (classMatch) { | |
| const isExported = !!classMatch[2]; | |
| const name = classMatch[3]; | |
| const startLine = i + 1; | |
| const endLine = findBlockEnd(lines, i); | |
| // Extract methods within the class body | |
| const methods = []; | |
| const classBodyLines = lines.slice(i + 1, endLine - 1); | |
| for (const bodyLine of classBodyLines) { | |
| // Match method patterns: methodName(, async methodName(, get/set methodName(, static methodName(, abstract methodName( | |
| const methodMatch = bodyLine.match(/^\s*(?:public\s+|private\s+|protected\s+)?(?:static\s+)?(?:abstract\s+)?(?:async\s+)?(?:get\s+|set\s+)?(?:\*\s*)?(\w+)\s*\(/); | |
| if (methodMatch) { | |
| const methodName = methodMatch[1]; | |
| // Skip control flow keywords that look like method calls | |
| if (methodName && methodName !== 'if' && methodName !== 'for' && methodName !== 'while' && methodName !== 'switch' && methodName !== 'catch') { | |
| if (!methods.includes(methodName)) { | |
| methods.push(methodName); | |
| } | |
| } | |
| } | |
| } | |
| classes.push({ | |
| name, | |
| startLine, | |
| endLine, | |
| methods, | |
| isExported, | |
| }); | |
| if (isExported) { | |
| functionAndClassExportNames.add(name); | |
| if (!exports.includes(name)) | |
| exports.push(name); | |
| } | |
| } | |
| } | |
| // --- Extract Imports --- | |
| for (let i = 0; i < lines.length; i++) { | |
| const line = lines[i]; | |
| // import { a, b } from 'module' | |
| const namedImportMatch = line.match(/^\s*import\s+\{([^}]+)\}\s+from\s+['"]([^'"]+)['"]/); | |
| if (namedImportMatch) { | |
| const names = namedImportMatch[1].split(',').map((n) => { | |
| const parts = n.trim().split(/\s+as\s+/); | |
| return parts[parts.length - 1].trim(); | |
| }).filter(Boolean); | |
| imports.push({ source: namedImportMatch[2], names }); | |
| continue; | |
| } | |
| // import name from 'module' (default import) | |
| const defaultImportMatch = line.match(/^\s*import\s+(\w+)\s+from\s+['"]([^'"]+)['"]/); | |
| if (defaultImportMatch) { | |
| imports.push({ source: defaultImportMatch[2], names: [defaultImportMatch[1]] }); | |
| continue; | |
| } | |
| // import * as name from 'module' (namespace import) | |
| const namespaceImportMatch = line.match(/^\s*import\s+\*\s+as\s+(\w+)\s+from\s+['"]([^'"]+)['"]/); | |
| if (namespaceImportMatch) { | |
| imports.push({ source: namespaceImportMatch[2], names: [namespaceImportMatch[1]] }); | |
| continue; | |
| } | |
| // import default, { named } from 'module' | |
| const mixedImportMatch = line.match(/^\s*import\s+(\w+)\s*,\s*\{([^}]+)\}\s+from\s+['"]([^'"]+)['"]/); | |
| if (mixedImportMatch) { | |
| const names = [mixedImportMatch[1]]; | |
| const namedParts = mixedImportMatch[2].split(',').map((n) => { | |
| const parts = n.trim().split(/\s+as\s+/); | |
| return parts[parts.length - 1].trim(); | |
| }).filter(Boolean); | |
| names.push(...namedParts); | |
| imports.push({ source: mixedImportMatch[3], names }); | |
| continue; | |
| } | |
| // import 'module' (side-effect import) | |
| const sideEffectMatch = line.match(/^\s*import\s+['"]([^'"]+)['"]/); | |
| if (sideEffectMatch) { | |
| imports.push({ source: sideEffectMatch[1], names: [] }); | |
| continue; | |
| } | |
| // const { a, b } = require('module') (destructured require) | |
| const destructuredRequireMatch = line.match(/^\s*(?:const|let|var)\s+\{([^}]+)\}\s*=\s*require\s*\(\s*['"]([^'"]+)['"]\s*\)/); | |
| if (destructuredRequireMatch) { | |
| const names = destructuredRequireMatch[1].split(',').map((n) => { | |
| const parts = n.trim().split(/\s*:\s*/); | |
| return parts[parts.length - 1].trim(); | |
| }).filter(Boolean); | |
| imports.push({ source: destructuredRequireMatch[2], names }); | |
| continue; | |
| } | |
| // const name = require('module') (CommonJS require) | |
| const requireMatch = line.match(/^\s*(?:const|let|var)\s+(\w+)\s*=\s*require\s*\(\s*['"]([^'"]+)['"]\s*\)/); | |
| if (requireMatch) { | |
| imports.push({ source: requireMatch[2], names: [requireMatch[1]] }); | |
| continue; | |
| } | |
| } | |
| // --- Extract Exports --- | |
| for (let i = 0; i < lines.length; i++) { | |
| const line = lines[i]; | |
| // export { a, b } or export { a, b } from 'module' | |
| const namedExportMatch = line.match(/^\s*export\s+\{([^}]+)\}/); | |
| if (namedExportMatch) { | |
| const names = namedExportMatch[1].split(',').map((n) => { | |
| const parts = n.trim().split(/\s+as\s+/); | |
| return parts[parts.length - 1].trim(); | |
| }).filter(Boolean); | |
| for (const name of names) { | |
| if (!exports.includes(name)) | |
| exports.push(name); | |
| } | |
| continue; | |
| } | |
| // export default name (not function/class which are already captured) | |
| const defaultExportMatch = line.match(/^\s*export\s+default\s+(\w+)\s*;?\s*$/); | |
| if (defaultExportMatch) { | |
| const name = defaultExportMatch[1]; | |
| // Skip if it's a keyword that starts a declaration (already captured) | |
| if (name !== 'function' && name !== 'class' && name !== 'abstract') { | |
| if (!exports.includes(name)) | |
| exports.push(name); | |
| } | |
| continue; | |
| } | |
| // export const/let/var name (not arrow functions which are already captured) | |
| const varExportMatch = line.match(/^\s*export\s+(?:const|let|var)\s+(\w+)/); | |
| if (varExportMatch) { | |
| const name = varExportMatch[1]; | |
| if (!exports.includes(name)) | |
| exports.push(name); | |
| continue; | |
| } | |
| } | |
| return { functions, classes, imports, exports }; | |
| } | |
| /** | |
| * Get the indentation level (number of leading spaces) of a line. | |
| * Tabs are counted as 4 spaces. | |
| */ | |
| function getIndentation(line) { | |
| let count = 0; | |
| for (const ch of line) { | |
| if (ch === ' ') | |
| count++; | |
| else if (ch === '\t') | |
| count += 4; | |
| else | |
| break; | |
| } | |
| return count; | |
| } | |
| /** | |
| * Find the end line of an indentation-based block (Python). | |
| * The block ends when a non-empty line at the same or lower indentation level is found, | |
| * or at end of file. | |
| */ | |
| function findIndentBlockEnd(lines, startLineIndex) { | |
| const baseIndent = getIndentation(lines[startLineIndex]); | |
| for (let i = startLineIndex + 1; i < lines.length; i++) { | |
| const line = lines[i]; | |
| // Skip empty lines and comment-only lines | |
| if (line.trim() === '' || line.trim().startsWith('#')) { | |
| continue; | |
| } | |
| const indent = getIndentation(line); | |
| if (indent <= baseIndent) { | |
| // The block ended at the previous non-empty line | |
| // Walk back to find the last non-empty line of the block | |
| for (let j = i - 1; j > startLineIndex; j--) { | |
| if (lines[j].trim() !== '') { | |
| return j + 1; // 1-indexed | |
| } | |
| } | |
| return startLineIndex + 1; // 1-indexed, just the def/class line itself | |
| } | |
| } | |
| // Block extends to end of file — find last non-empty line | |
| for (let j = lines.length - 1; j > startLineIndex; j--) { | |
| if (lines[j].trim() !== '') { | |
| return j + 1; // 1-indexed | |
| } | |
| } | |
| return lines.length; | |
| } | |
| /** | |
| * Extract parameter names from a Python params string, stripping type annotations and defaults. | |
| * e.g. "self, name: str, age: int = 0, *args, **kwargs" -> ["self", "name", "age", "args", "kwargs"] | |
| */ | |
| function extractPythonParamNames(paramsStr) { | |
| if (!paramsStr.trim()) | |
| return []; | |
| const params = []; | |
| let depth = 0; | |
| let current = ''; | |
| for (const ch of paramsStr) { | |
| if (ch === '(' || ch === '[' || ch === '{') { | |
| depth++; | |
| current += ch; | |
| } | |
| else if (ch === ')' || ch === ']' || ch === '}') { | |
| depth--; | |
| current += ch; | |
| } | |
| else if (ch === ',' && depth === 0) { | |
| params.push(current); | |
| current = ''; | |
| } | |
| else { | |
| current += ch; | |
| } | |
| } | |
| if (current.trim()) { | |
| params.push(current); | |
| } | |
| return params | |
| .map((p) => { | |
| let trimmed = p.trim(); | |
| // Strip leading * or ** | |
| trimmed = trimmed.replace(/^\*\*/, '').replace(/^\*/, ''); | |
| // Strip type annotation (everything after :) and default (everything after =) | |
| const name = trimmed.split(/[=:]/)[0].trim(); | |
| return name; | |
| }) | |
| .filter((name) => name !== '' && name !== '/'); | |
| } | |
| /** | |
| * Parse Python content and extract structural information. | |
| */ | |
| export function parsePython(content) { | |
| const lines = content.split('\n'); | |
| const functions = []; | |
| const classes = []; | |
| const imports = []; | |
| const exports = []; | |
| // Check for __all__ definition | |
| let allNames = null; | |
| const allMatch = content.match(/__all__\s*=\s*\[([^\]]*)\]/s); | |
| if (allMatch) { | |
| allNames = allMatch[1] | |
| .split(',') | |
| .map((s) => s.trim().replace(/^['"]|['"]$/g, '')) | |
| .filter(Boolean); | |
| } | |
| // --- Extract Classes first (to know which line ranges are class bodies) --- | |
| const classRanges = []; | |
| for (let i = 0; i < lines.length; i++) { | |
| const line = lines[i]; | |
| // Match class definitions: class Name: or class Name(Base): | |
| const classMatch = line.match(/^(\s*)class\s+(\w+)\s*(?:\([^)]*\))?\s*:/); | |
| if (classMatch) { | |
| const classIndent = getIndentation(line); | |
| const name = classMatch[2]; | |
| const startLine = i + 1; // 1-indexed | |
| const endLine = findIndentBlockEnd(lines, i); | |
| const isExported = !name.startsWith('_'); | |
| // Extract methods within the class body | |
| const methods = []; | |
| for (let j = i + 1; j < endLine; j++) { | |
| const bodyLine = lines[j]; | |
| const methodMatch = bodyLine.match(/^(\s*)(?:async\s+)?def\s+(\w+)\s*\(/); | |
| if (methodMatch) { | |
| const methodIndent = getIndentation(bodyLine); | |
| // Method must be indented more than the class | |
| if (methodIndent > classIndent) { | |
| methods.push(methodMatch[2]); | |
| } | |
| } | |
| } | |
| classes.push({ | |
| name, | |
| startLine, | |
| endLine, | |
| methods, | |
| isExported, | |
| }); | |
| classRanges.push({ start: i, end: endLine - 1 }); // 0-indexed range | |
| if (isExported) { | |
| if (!exports.includes(name)) | |
| exports.push(name); | |
| } | |
| } | |
| } | |
| // --- Extract Functions (skip those inside class bodies) --- | |
| for (let i = 0; i < lines.length; i++) { | |
| const line = lines[i]; | |
| // Skip lines that are inside a class body | |
| const insideClass = classRanges.some((r) => i > r.start && i <= r.end); | |
| if (insideClass) | |
| continue; | |
| // Match function definitions: def name(params): or async def name(params): | |
| const funcMatch = line.match(/^(\s*)(?:async\s+)?def\s+(\w+)\s*\(([^)]*)\)\s*(?:->.*?)?\s*:/); | |
| if (funcMatch) { | |
| const funcIndent = getIndentation(line); | |
| const name = funcMatch[2]; | |
| const paramsStr = funcMatch[3]; | |
| const startLine = i + 1; // 1-indexed | |
| const endLine = findIndentBlockEnd(lines, i); | |
| // A top-level function (indent 0) that doesn't start with _ is "exported" | |
| const isExported = funcIndent === 0 && !name.startsWith('_'); | |
| const params = extractPythonParamNames(paramsStr); | |
| functions.push({ | |
| name, | |
| startLine, | |
| endLine, | |
| params, | |
| isExported, | |
| }); | |
| if (isExported) { | |
| if (!exports.includes(name)) | |
| exports.push(name); | |
| } | |
| } | |
| } | |
| // --- Extract Imports --- | |
| for (let i = 0; i < lines.length; i++) { | |
| const line = lines[i]; | |
| // from module import (name1, name2) — multi-line | |
| const fromMultiLineMatch = line.match(/^\s*from\s+([\w.]+)\s+import\s*\(/); | |
| if (fromMultiLineMatch) { | |
| const source = fromMultiLineMatch[1]; | |
| let namesStr = ''; | |
| // Collect names until closing paren | |
| const afterParen = line.substring(line.indexOf('(') + 1); | |
| if (afterParen.includes(')')) { | |
| // Single line with parens: from module import (a, b) | |
| namesStr = afterParen.substring(0, afterParen.indexOf(')')); | |
| } | |
| else { | |
| namesStr = afterParen; | |
| for (let j = i + 1; j < lines.length; j++) { | |
| const contLine = lines[j]; | |
| if (contLine.includes(')')) { | |
| namesStr += contLine.substring(0, contLine.indexOf(')')); | |
| break; | |
| } | |
| namesStr += contLine; | |
| } | |
| } | |
| const names = namesStr | |
| .split(',') | |
| .map((n) => { | |
| const parts = n.trim().split(/\s+as\s+/); | |
| return parts[parts.length - 1].trim(); | |
| }) | |
| .filter(Boolean); | |
| imports.push({ source, names }); | |
| continue; | |
| } | |
| // from module import name1, name2 (single line) | |
| const fromImportMatch = line.match(/^\s*from\s+([\w.]+)\s+import\s+(.+)$/); | |
| if (fromImportMatch) { | |
| const source = fromImportMatch[1]; | |
| const importPart = fromImportMatch[2].trim(); | |
| if (importPart === '*') { | |
| imports.push({ source, names: ['*'] }); | |
| } | |
| else { | |
| const names = importPart | |
| .split(',') | |
| .map((n) => { | |
| const parts = n.trim().split(/\s+as\s+/); | |
| return parts[parts.length - 1].trim(); | |
| }) | |
| .filter(Boolean); | |
| imports.push({ source, names }); | |
| } | |
| continue; | |
| } | |
| // import module or import module as alias or import mod1, mod2 | |
| const importMatch = line.match(/^\s*import\s+(.+)$/); | |
| if (importMatch) { | |
| const importPart = importMatch[1].trim(); | |
| const modules = importPart.split(','); | |
| for (const mod of modules) { | |
| const parts = mod.trim().split(/\s+as\s+/); | |
| const source = parts[0].trim(); | |
| const alias = parts.length > 1 ? parts[parts.length - 1].trim() : source; | |
| imports.push({ source, names: [alias] }); | |
| } | |
| continue; | |
| } | |
| } | |
| // If __all__ is defined, use it as the exports list | |
| if (allNames) { | |
| // Replace exports with __all__ contents | |
| exports.length = 0; | |
| exports.push(...allNames); | |
| } | |
| return { functions, classes, imports, exports }; | |
| } | |
| /** | |
| * Parse Go content and extract structural information. | |
| */ | |
| export function parseGo(content) { | |
| const lines = content.split('\n'); | |
| const functions = []; | |
| const classes = []; | |
| const imports = []; | |
| const exports = []; | |
| // --- Extract Imports --- | |
| for (let i = 0; i < lines.length; i++) { | |
| const line = lines[i]; | |
| // Single import: import "pkg" | |
| const singleImport = line.match(/^\s*import\s+"([^"]+)"/); | |
| if (singleImport) { | |
| const source = singleImport[1]; | |
| const name = source.split('/').pop() || source; | |
| imports.push({ source, names: [name] }); | |
| continue; | |
| } | |
| // Block import: import ( ... ) | |
| const blockImportStart = line.match(/^\s*import\s*\(/); | |
| if (blockImportStart) { | |
| for (let j = i + 1; j < lines.length; j++) { | |
| const importLine = lines[j].trim(); | |
| if (importLine === ')') | |
| break; | |
| // Match "pkg" or alias "pkg" | |
| const pkgMatch = importLine.match(/^(?:(\w+)\s+)?"([^"]+)"/); | |
| if (pkgMatch) { | |
| const alias = pkgMatch[1]; | |
| const source = pkgMatch[2]; | |
| const name = alias || source.split('/').pop() || source; | |
| imports.push({ source, names: [name] }); | |
| } | |
| } | |
| continue; | |
| } | |
| } | |
| // --- Extract Types (struct/interface as classes) --- | |
| for (let i = 0; i < lines.length; i++) { | |
| const line = lines[i]; | |
| // type Name struct { or type Name interface { | |
| const typeMatch = line.match(/^\s*type\s+(\w+)\s+(?:struct|interface)\s*\{/); | |
| if (typeMatch) { | |
| const name = typeMatch[1]; | |
| const startLine = i + 1; | |
| const endLine = findBlockEnd(lines, i); | |
| const isExported = /^[A-Z]/.test(name); | |
| // Extract methods from interface bodies | |
| const methods = []; | |
| const bodyLines = lines.slice(i + 1, endLine - 1); | |
| for (const bodyLine of bodyLines) { | |
| const methodMatch = bodyLine.match(/^\s+(\w+)\s*\(/); | |
| if (methodMatch) { | |
| methods.push(methodMatch[1]); | |
| } | |
| } | |
| classes.push({ name, startLine, endLine, methods, isExported }); | |
| if (isExported && !exports.includes(name)) | |
| exports.push(name); | |
| } | |
| } | |
| // --- Extract Functions --- | |
| for (let i = 0; i < lines.length; i++) { | |
| const line = lines[i]; | |
| // func (receiver) Name(params) or func Name(params) | |
| const funcMatch = line.match(/^\s*func\s+(?:\(\s*\w+\s+\*?\w+\s*\)\s+)?(\w+)\s*\(([^)]*)\)/); | |
| if (funcMatch) { | |
| const name = funcMatch[1]; | |
| const paramsStr = funcMatch[2]; | |
| const startLine = i + 1; | |
| const endLine = line.includes('{') ? findBlockEnd(lines, i) : i + 1; | |
| const isExported = /^[A-Z]/.test(name); | |
| // Extract param names (Go params: name type, name type) | |
| const params = paramsStr | |
| .split(',') | |
| .map((p) => p.trim().split(/\s+/)[0]) | |
| .filter((p) => p && p !== ''); | |
| functions.push({ name, startLine, endLine, params, isExported }); | |
| if (isExported && !exports.includes(name)) | |
| exports.push(name); | |
| } | |
| } | |
| return { functions, classes, imports, exports }; | |
| } | |
| /** | |
| * Parse Rust content and extract structural information. | |
| */ | |
| export function parseRust(content) { | |
| const lines = content.split('\n'); | |
| const functions = []; | |
| const classes = []; | |
| const imports = []; | |
| const exports = []; | |
| // --- Extract Imports (use statements) --- | |
| for (let i = 0; i < lines.length; i++) { | |
| const line = lines[i]; | |
| // use path::to::item; or use path::to::{item1, item2}; | |
| const useMatch = line.match(/^\s*(?:pub\s+)?use\s+(.+);/); | |
| if (useMatch) { | |
| const usePath = useMatch[1].trim(); | |
| // Handle grouped imports: use std::collections::{HashMap, HashSet}; | |
| const groupMatch = usePath.match(/^(.+)::\{([^}]+)\}/); | |
| if (groupMatch) { | |
| const basePath = groupMatch[1]; | |
| const names = groupMatch[2].split(',').map((n) => n.trim().split(/\s+as\s+/).pop().trim()).filter(Boolean); | |
| imports.push({ source: basePath, names }); | |
| } | |
| else { | |
| // Simple use: use std::collections::HashMap; | |
| const parts = usePath.split('::'); | |
| const name = parts[parts.length - 1].split(/\s+as\s+/).pop().trim(); | |
| imports.push({ source: usePath, names: [name] }); | |
| } | |
| continue; | |
| } | |
| } | |
| // --- Extract Structs/Enums as classes --- | |
| for (let i = 0; i < lines.length; i++) { | |
| const line = lines[i]; | |
| // pub struct Name { or struct Name { or pub enum Name { or enum Name { | |
| const structMatch = line.match(/^\s*(pub\s+)?(?:struct|enum)\s+(\w+)/); | |
| if (structMatch) { | |
| const isExported = !!structMatch[1]; | |
| const name = structMatch[2]; | |
| const startLine = i + 1; | |
| const endLine = line.includes('{') ? findBlockEnd(lines, i) : i + 1; | |
| classes.push({ name, startLine, endLine, methods: [], isExported }); | |
| if (isExported && !exports.includes(name)) | |
| exports.push(name); | |
| } | |
| } | |
| // --- Extract impl blocks and their methods --- | |
| for (let i = 0; i < lines.length; i++) { | |
| const line = lines[i]; | |
| // impl Name { or impl Trait for Name { | |
| const implMatch = line.match(/^\s*impl\s+(?:\w+\s+for\s+)?(\w+)\s*\{/); | |
| if (implMatch) { | |
| const implName = implMatch[1]; | |
| const implEnd = findBlockEnd(lines, i); | |
| // Find the class entry for this impl | |
| const classEntry = classes.find((c) => c.name === implName); | |
| // Extract methods inside impl block | |
| for (let j = i + 1; j < implEnd - 1; j++) { | |
| const bodyLine = lines[j]; | |
| const fnMatch = bodyLine.match(/^\s*(pub\s+)?(?:async\s+)?fn\s+(\w+)\s*\(([^)]*)\)/); | |
| if (fnMatch) { | |
| const fnIsExported = !!fnMatch[1]; | |
| const fnName = fnMatch[2]; | |
| const fnParamsStr = fnMatch[3]; | |
| const fnStartLine = j + 1; | |
| const fnEndLine = bodyLine.includes('{') ? findBlockEnd(lines, j) : j + 1; | |
| // Extract param names (skip self/&self/&mut self) | |
| const params = fnParamsStr | |
| .split(',') | |
| .map((p) => p.trim()) | |
| .filter((p) => p && !p.startsWith('&') && p !== 'self' && !p.startsWith('mut self')) | |
| .map((p) => p.split(':')[0].replace(/^mut\s+/, '').trim()) | |
| .filter(Boolean); | |
| if (classEntry) { | |
| classEntry.methods.push(fnName); | |
| } | |
| functions.push({ name: fnName, startLine: fnStartLine, endLine: fnEndLine, params, isExported: fnIsExported }); | |
| if (fnIsExported && !exports.includes(fnName)) | |
| exports.push(fnName); | |
| } | |
| } | |
| } | |
| } | |
| // --- Extract standalone functions (not inside impl blocks) --- | |
| const implRanges = []; | |
| for (let i = 0; i < lines.length; i++) { | |
| const line = lines[i]; | |
| if (line.match(/^\s*impl\s+/)) { | |
| const end = findBlockEnd(lines, i); | |
| implRanges.push({ start: i, end: end - 1 }); | |
| } | |
| } | |
| for (let i = 0; i < lines.length; i++) { | |
| const line = lines[i]; | |
| // Skip lines inside impl blocks | |
| const insideImpl = implRanges.some((r) => i >= r.start && i <= r.end); | |
| if (insideImpl) | |
| continue; | |
| // pub fn name(params) or fn name(params) or pub async fn name(params) | |
| const fnMatch = line.match(/^\s*(pub\s+)?(?:async\s+)?fn\s+(\w+)\s*\(([^)]*)\)/); | |
| if (fnMatch) { | |
| const isExported = !!fnMatch[1]; | |
| const name = fnMatch[2]; | |
| const paramsStr = fnMatch[3]; | |
| const startLine = i + 1; | |
| const endLine = line.includes('{') ? findBlockEnd(lines, i) : i + 1; | |
| const params = paramsStr | |
| .split(',') | |
| .map((p) => p.trim()) | |
| .filter((p) => p && !p.startsWith('&') && p !== 'self' && !p.startsWith('mut self')) | |
| .map((p) => p.split(':')[0].replace(/^mut\s+/, '').trim()) | |
| .filter(Boolean); | |
| functions.push({ name, startLine, endLine, params, isExported }); | |
| if (isExported && !exports.includes(name)) | |
| exports.push(name); | |
| } | |
| } | |
| return { functions, classes, imports, exports }; | |
| } | |
| /** | |
| * Parse Java content and extract structural information. | |
| */ | |
| export function parseJava(content) { | |
| const lines = content.split('\n'); | |
| const functions = []; | |
| const classes = []; | |
| const imports = []; | |
| const exports = []; | |
| // --- Extract Imports --- | |
| for (let i = 0; i < lines.length; i++) { | |
| const line = lines[i]; | |
| // import package.Class; or import static package.Class.method; | |
| const importMatch = line.match(/^\s*import\s+(?:static\s+)?([^;]+);/); | |
| if (importMatch) { | |
| const source = importMatch[1].trim(); | |
| const parts = source.split('.'); | |
| const name = parts[parts.length - 1]; | |
| imports.push({ source, names: [name] }); | |
| } | |
| } | |
| // --- Extract Classes/Interfaces --- | |
| for (let i = 0; i < lines.length; i++) { | |
| const line = lines[i]; | |
| // public/protected/private class Name or interface Name | |
| const classMatch = line.match(/^\s*(?:(public|protected|private)\s+)?(?:abstract\s+)?(?:static\s+)?(?:final\s+)?(?:class|interface|enum)\s+(\w+)/); | |
| if (classMatch) { | |
| const visibility = classMatch[1]; | |
| const name = classMatch[2]; | |
| const startLine = i + 1; | |
| const endLine = findBlockEnd(lines, i); | |
| const isExported = visibility === 'public'; | |
| // Extract methods within the class body | |
| // We need to track depth so we only match methods at the class body level (depth 1) | |
| const methods = []; | |
| let depth = 0; | |
| for (let j = i; j < endLine; j++) { | |
| const bodyLine = lines[j]; | |
| // Check for method BEFORE counting braces on this line | |
| // A method declaration at class level means depth is currently 1 | |
| if (depth === 1) { | |
| const methodMatch = bodyLine.match(/^\s*(?:(?:public|protected|private)\s+)?(?:static\s+)?(?:final\s+)?(?:abstract\s+)?(?:synchronized\s+)?(?:\w+(?:<[^>]*>)?(?:\[\])*)\s+(\w+)\s*\(/); | |
| if (methodMatch) { | |
| const methodName = methodMatch[1]; | |
| if (methodName !== 'if' && methodName !== 'for' && methodName !== 'while' && methodName !== 'switch' && methodName !== 'catch') { | |
| if (!methods.includes(methodName)) { | |
| methods.push(methodName); | |
| } | |
| } | |
| } | |
| // Also match constructors | |
| const ctorMatch = bodyLine.match(new RegExp(`^\\s*(?:(?:public|protected|private)\\s+)?${name}\\s*\\(`)); | |
| if (ctorMatch && !methods.includes(name)) { | |
| methods.push(name); | |
| } | |
| } | |
| // Count braces after checking for methods | |
| for (const ch of bodyLine) { | |
| if (ch === '{') | |
| depth++; | |
| if (ch === '}') | |
| depth--; | |
| } | |
| } | |
| classes.push({ name, startLine, endLine, methods, isExported }); | |
| if (isExported && !exports.includes(name)) | |
| exports.push(name); | |
| // Extract methods as functions too | |
| depth = 0; | |
| for (let j = i; j < endLine; j++) { | |
| const bodyLine = lines[j]; | |
| if (depth === 1) { | |
| const methodMatch = bodyLine.match(/^\s*(public|protected|private)?\s*(?:static\s+)?(?:final\s+)?(?:abstract\s+)?(?:synchronized\s+)?(?:\w+(?:<[^>]*>)?(?:\[\])*)\s+(\w+)\s*\(([^)]*)\)/); | |
| if (methodMatch) { | |
| const methodVisibility = methodMatch[1]; | |
| const methodName = methodMatch[2]; | |
| const methodParams = methodMatch[3]; | |
| if (methodName !== 'if' && methodName !== 'for' && methodName !== 'while' && methodName !== 'switch' && methodName !== 'catch') { | |
| const fnIsExported = methodVisibility === 'public'; | |
| const fnStartLine = j + 1; | |
| const fnEndLine = bodyLine.includes('{') ? findBlockEnd(lines, j) : j + 1; | |
| // Extract param names from Java params (Type name, Type name) | |
| const params = methodParams | |
| .split(',') | |
| .map((p) => { | |
| const parts = p.trim().split(/\s+/); | |
| return parts.length >= 2 ? parts[parts.length - 1] : ''; | |
| }) | |
| .filter(Boolean); | |
| functions.push({ name: methodName, startLine: fnStartLine, endLine: fnEndLine, params, isExported: fnIsExported }); | |
| if (fnIsExported && !exports.includes(methodName)) | |
| exports.push(methodName); | |
| } | |
| } | |
| } | |
| for (const ch of bodyLine) { | |
| if (ch === '{') | |
| depth++; | |
| if (ch === '}') | |
| depth--; | |
| } | |
| } | |
| } | |
| } | |
| return { functions, classes, imports, exports }; | |
| } | |
| /** | |
| * Parse generic content (PHP, Ruby, or unknown languages) using common patterns. | |
| * Falls back to empty result if nothing is detected. | |
| */ | |
| export function parseGeneric(content) { | |
| const lines = content.split('\n'); | |
| const functions = []; | |
| const classes = []; | |
| const imports = []; | |
| const exports = []; | |
| // --- Extract Imports --- | |
| for (let i = 0; i < lines.length; i++) { | |
| const line = lines[i]; | |
| // PHP: use Namespace\Class; | |
| const phpUseMatch = line.match(/^\s*use\s+([^;]+);/); | |
| if (phpUseMatch) { | |
| const source = phpUseMatch[1].trim(); | |
| const parts = source.split('\\'); | |
| const name = parts[parts.length - 1]; | |
| imports.push({ source, names: [name] }); | |
| continue; | |
| } | |
| // PHP: require/include | |
| const phpRequireMatch = line.match(/^\s*(?:require|include)(?:_once)?\s+['"]([^'"]+)['"]/); | |
| if (phpRequireMatch) { | |
| const source = phpRequireMatch[1]; | |
| const name = source.split('/').pop()?.replace(/\.\w+$/, '') || source; | |
| imports.push({ source, names: [name] }); | |
| continue; | |
| } | |
| // Ruby: require 'gem' or require_relative 'file' | |
| const rubyRequireMatch = line.match(/^\s*require(?:_relative)?\s+['"]([^'"]+)['"]/); | |
| if (rubyRequireMatch) { | |
| const source = rubyRequireMatch[1]; | |
| const name = source.split('/').pop() || source; | |
| imports.push({ source, names: [name] }); | |
| continue; | |
| } | |
| } | |
| // --- Extract Classes --- | |
| for (let i = 0; i < lines.length; i++) { | |
| const line = lines[i]; | |
| // class Name (PHP, Ruby, etc.) | |
| const classMatch = line.match(/^\s*(?:(?:public|abstract|final)\s+)*class\s+(\w+)/); | |
| if (classMatch) { | |
| const name = classMatch[1]; | |
| const startLine = i + 1; | |
| // Use brace counting for PHP, indentation-based for Ruby | |
| let endLine; | |
| if (line.includes('{') || (i + 1 < lines.length && lines[i + 1]?.trim() === '{')) { | |
| endLine = findBlockEnd(lines, i); | |
| } | |
| else { | |
| // Ruby-style: look for 'end' keyword at same indentation | |
| endLine = findRubyBlockEnd(lines, i); | |
| } | |
| // Extract methods | |
| const methods = []; | |
| for (let j = i + 1; j < endLine; j++) { | |
| const bodyLine = lines[j]; | |
| const methodMatch = bodyLine.match(/^\s*(?:(?:public|protected|private)\s+)?(?:static\s+)?(?:function|def)\s+(\w+)/); | |
| if (methodMatch) { | |
| methods.push(methodMatch[1]); | |
| } | |
| } | |
| classes.push({ name, startLine, endLine, methods, isExported: true }); | |
| if (!exports.includes(name)) | |
| exports.push(name); | |
| } | |
| } | |
| // --- Extract Functions --- | |
| for (let i = 0; i < lines.length; i++) { | |
| const line = lines[i]; | |
| // Skip lines inside class bodies | |
| const insideClass = classes.some((c) => (i + 1) > c.startLine && (i + 1) < c.endLine); | |
| if (insideClass) | |
| continue; | |
| // function name( (PHP), def name( (Ruby/Python-like), fn name( (Rust-like) | |
| const funcMatch = line.match(/^\s*(?:(?:public|protected|private)\s+)?(?:static\s+)?(?:function|def|fn)\s+(\w+)\s*\(([^)]*)\)/); | |
| if (funcMatch) { | |
| const name = funcMatch[1]; | |
| const paramsStr = funcMatch[2]; | |
| const startLine = i + 1; | |
| let endLine; | |
| if (line.includes('{') || (i + 1 < lines.length && lines[i + 1]?.trim() === '{')) { | |
| endLine = findBlockEnd(lines, i); | |
| } | |
| else { | |
| // Ruby-style: look for 'end' keyword | |
| endLine = findRubyBlockEnd(lines, i); | |
| } | |
| const params = paramsStr | |
| .split(',') | |
| .map((p) => p.trim().replace(/^\$/, '').split(/[=:]/)[0].trim()) | |
| .filter(Boolean); | |
| functions.push({ name, startLine, endLine, params, isExported: true }); | |
| if (!exports.includes(name)) | |
| exports.push(name); | |
| } | |
| } | |
| return { functions, classes, imports, exports }; | |
| } | |
| /** | |
| * Find the end of a Ruby-style block (terminated by 'end' keyword at same or lower indentation). | |
| */ | |
| function findRubyBlockEnd(lines, startLineIndex) { | |
| const baseIndent = getIndentation(lines[startLineIndex]); | |
| for (let i = startLineIndex + 1; i < lines.length; i++) { | |
| const line = lines[i]; | |
| if (line.trim() === '') | |
| continue; | |
| const indent = getIndentation(line); | |
| if (indent <= baseIndent && line.trim() === 'end') { | |
| return i + 1; // 1-indexed | |
| } | |
| } | |
| // If no 'end' found, return end of file | |
| return lines.length; | |
| } | |
| /** | |
| * Parses a source file and extracts structural information using | |
| * regex-based extraction (no tree-sitter dependency). | |
| * | |
| * @param filePath - Absolute path to the file to parse | |
| * @param language - Detected language of the file (e.g., 'typescript', 'python') | |
| * @returns ParseResult with extracted functions, classes, imports, and exports | |
| */ | |
| export function parseFile(filePath, language) { | |
| const content = readFileSync(filePath, 'utf-8'); | |
| if (language === 'typescript' || language === 'javascript') { | |
| return parseTypeScript(content); | |
| } | |
| if (language === 'python') { | |
| return parsePython(content); | |
| } | |
| if (language === 'go') { | |
| return parseGo(content); | |
| } | |
| if (language === 'rust') { | |
| return parseRust(content); | |
| } | |
| if (language === 'java') { | |
| return parseJava(content); | |
| } | |
| // Ruby, PHP, and anything else → generic parser | |
| return parseGeneric(content); | |
| } | |