mr4's picture
Upload 136 files
fd8cdf5 verified
import { readFileSync } from 'fs';
/**
* Extract parameter names from a params string, stripping type annotations.
* e.g. "a: string, b: number = 5" -> ["a", "b"]
*/
function extractParamNames(paramsStr) {
if (!paramsStr.trim())
return [];
const params = [];
let depth = 0;
let current = '';
for (const ch of paramsStr) {
if (ch === '<' || ch === '(' || ch === '{' || ch === '[') {
depth++;
current += ch;
}
else if (ch === '>' || ch === ')' || ch === '}' || ch === ']') {
depth--;
current += ch;
}
else if (ch === ',' && depth === 0) {
params.push(current);
current = '';
}
else {
current += ch;
}
}
if (current.trim()) {
params.push(current);
}
return params
.map((p) => {
const trimmed = p.trim();
// Handle destructured params like { a, b }: Type
if (trimmed.startsWith('{')) {
return trimmed.replace(/\}.*$/, '}').replace(/[{}]/g, '').split(',').map((s) => s.split(':')[0].trim()).filter(Boolean);
}
// Handle array destructured params like [a, b]: Type
if (trimmed.startsWith('[')) {
return trimmed.replace(/\].*$/, ']').replace(/[[\]]/g, '').split(',').map((s) => s.split(':')[0].trim()).filter(Boolean);
}
// Handle rest params: ...args: string[]
const name = trimmed.replace(/^\.\.\./, '').split(/[?:=]/)[0].trim();
return name ? [name] : [];
})
.flat()
.filter(Boolean);
}
/**
* Find the end line of a block starting at a given line by counting braces.
* Returns the line number (1-indexed) of the closing brace.
*/
function findBlockEnd(lines, startLineIndex) {
let depth = 0;
let foundOpen = false;
for (let i = startLineIndex; i < lines.length; i++) {
const line = lines[i];
for (const ch of line) {
if (ch === '{') {
depth++;
foundOpen = true;
}
else if (ch === '}') {
depth--;
if (foundOpen && depth === 0) {
return i + 1; // 1-indexed
}
}
}
}
// If no matching brace found, return end of file
return lines.length;
}
/**
* Parse TypeScript/JavaScript content and extract structural information.
*/
export function parseTypeScript(content) {
const lines = content.split('\n');
const functions = [];
const classes = [];
const imports = [];
const exports = [];
// Track which names are already captured as function/class exports
const functionAndClassExportNames = new Set();
// --- Extract Functions ---
// Patterns for function declarations
const funcPatterns = [
// export default async function name(params)
/^(\s*)(export\s+default\s+)?(export\s+)?(?:async\s+)?function\s*\*?\s+(\w+)\s*\(([^)]*)\)/,
// const/let/var name = (async) (params) => or function expression
/^(\s*)(export\s+)?(?:const|let|var)\s+(\w+)\s*=\s*(?:async\s+)?\(([^)]*)\)\s*(?::\s*[^=]*?)?\s*=>/,
// const/let/var name = async (params) => (alternate capture for async before parens)
/^(\s*)(export\s+)?(?:const|let|var)\s+(\w+)\s*=\s*async\s*\(([^)]*)\)\s*(?::\s*[^=]*?)?\s*=>/,
];
for (let i = 0; i < lines.length; i++) {
const line = lines[i];
// Try standard function declaration pattern
const funcDeclMatch = line.match(/^(\s*)(export\s+default\s+|export\s+)?(?:async\s+)?function\s*\*?\s+(\w+)\s*\(([^)]*)\)/);
if (funcDeclMatch) {
const isExported = !!funcDeclMatch[2];
const name = funcDeclMatch[3];
const paramsStr = funcDeclMatch[4];
const startLine = i + 1;
const endLine = findBlockEnd(lines, i);
functions.push({
name,
startLine,
endLine,
params: extractParamNames(paramsStr),
isExported,
});
if (isExported) {
functionAndClassExportNames.add(name);
if (!exports.includes(name))
exports.push(name);
}
continue;
}
// Try arrow function pattern: (export) const/let/var name = (async) (params) =>
const arrowMatch = line.match(/^(\s*)(export\s+)?(?:const|let|var)\s+(\w+)\s*=\s*(?:async\s+)?\(([^)]*)\)\s*(?::[^=]*?)?\s*=>/);
if (arrowMatch) {
const isExported = !!arrowMatch[2];
const name = arrowMatch[3];
const paramsStr = arrowMatch[4];
const startLine = i + 1;
// Arrow functions may or may not have a block body
const hasBlock = line.includes('=>') && (line.trim().endsWith('{') || lines[i]?.includes('{'));
const endLine = hasBlock ? findBlockEnd(lines, i) : i + 1;
functions.push({
name,
startLine,
endLine,
params: extractParamNames(paramsStr),
isExported,
});
if (isExported) {
functionAndClassExportNames.add(name);
if (!exports.includes(name))
exports.push(name);
}
continue;
}
// Try arrow function with no parens for single param: const name = param =>
const singleParamArrow = line.match(/^(\s*)(export\s+)?(?:const|let|var)\s+(\w+)\s*=\s*(?:async\s+)?(\w+)\s*=>/);
if (singleParamArrow) {
const isExported = !!singleParamArrow[2];
const name = singleParamArrow[3];
const paramName = singleParamArrow[4];
const startLine = i + 1;
const hasBlock = line.trim().endsWith('{');
const endLine = hasBlock ? findBlockEnd(lines, i) : i + 1;
functions.push({
name,
startLine,
endLine,
params: [paramName],
isExported,
});
if (isExported) {
functionAndClassExportNames.add(name);
if (!exports.includes(name))
exports.push(name);
}
continue;
}
}
// --- Extract Classes ---
for (let i = 0; i < lines.length; i++) {
const line = lines[i];
const classMatch = line.match(/^(\s*)(export\s+default\s+|export\s+)?(?:abstract\s+)?class\s+(\w+)/);
if (classMatch) {
const isExported = !!classMatch[2];
const name = classMatch[3];
const startLine = i + 1;
const endLine = findBlockEnd(lines, i);
// Extract methods within the class body
const methods = [];
const classBodyLines = lines.slice(i + 1, endLine - 1);
for (const bodyLine of classBodyLines) {
// Match method patterns: methodName(, async methodName(, get/set methodName(, static methodName(, abstract methodName(
const methodMatch = bodyLine.match(/^\s*(?:public\s+|private\s+|protected\s+)?(?:static\s+)?(?:abstract\s+)?(?:async\s+)?(?:get\s+|set\s+)?(?:\*\s*)?(\w+)\s*\(/);
if (methodMatch) {
const methodName = methodMatch[1];
// Skip control flow keywords that look like method calls
if (methodName && methodName !== 'if' && methodName !== 'for' && methodName !== 'while' && methodName !== 'switch' && methodName !== 'catch') {
if (!methods.includes(methodName)) {
methods.push(methodName);
}
}
}
}
classes.push({
name,
startLine,
endLine,
methods,
isExported,
});
if (isExported) {
functionAndClassExportNames.add(name);
if (!exports.includes(name))
exports.push(name);
}
}
}
// --- Extract Imports ---
for (let i = 0; i < lines.length; i++) {
const line = lines[i];
// import { a, b } from 'module'
const namedImportMatch = line.match(/^\s*import\s+\{([^}]+)\}\s+from\s+['"]([^'"]+)['"]/);
if (namedImportMatch) {
const names = namedImportMatch[1].split(',').map((n) => {
const parts = n.trim().split(/\s+as\s+/);
return parts[parts.length - 1].trim();
}).filter(Boolean);
imports.push({ source: namedImportMatch[2], names });
continue;
}
// import name from 'module' (default import)
const defaultImportMatch = line.match(/^\s*import\s+(\w+)\s+from\s+['"]([^'"]+)['"]/);
if (defaultImportMatch) {
imports.push({ source: defaultImportMatch[2], names: [defaultImportMatch[1]] });
continue;
}
// import * as name from 'module' (namespace import)
const namespaceImportMatch = line.match(/^\s*import\s+\*\s+as\s+(\w+)\s+from\s+['"]([^'"]+)['"]/);
if (namespaceImportMatch) {
imports.push({ source: namespaceImportMatch[2], names: [namespaceImportMatch[1]] });
continue;
}
// import default, { named } from 'module'
const mixedImportMatch = line.match(/^\s*import\s+(\w+)\s*,\s*\{([^}]+)\}\s+from\s+['"]([^'"]+)['"]/);
if (mixedImportMatch) {
const names = [mixedImportMatch[1]];
const namedParts = mixedImportMatch[2].split(',').map((n) => {
const parts = n.trim().split(/\s+as\s+/);
return parts[parts.length - 1].trim();
}).filter(Boolean);
names.push(...namedParts);
imports.push({ source: mixedImportMatch[3], names });
continue;
}
// import 'module' (side-effect import)
const sideEffectMatch = line.match(/^\s*import\s+['"]([^'"]+)['"]/);
if (sideEffectMatch) {
imports.push({ source: sideEffectMatch[1], names: [] });
continue;
}
// const { a, b } = require('module') (destructured require)
const destructuredRequireMatch = line.match(/^\s*(?:const|let|var)\s+\{([^}]+)\}\s*=\s*require\s*\(\s*['"]([^'"]+)['"]\s*\)/);
if (destructuredRequireMatch) {
const names = destructuredRequireMatch[1].split(',').map((n) => {
const parts = n.trim().split(/\s*:\s*/);
return parts[parts.length - 1].trim();
}).filter(Boolean);
imports.push({ source: destructuredRequireMatch[2], names });
continue;
}
// const name = require('module') (CommonJS require)
const requireMatch = line.match(/^\s*(?:const|let|var)\s+(\w+)\s*=\s*require\s*\(\s*['"]([^'"]+)['"]\s*\)/);
if (requireMatch) {
imports.push({ source: requireMatch[2], names: [requireMatch[1]] });
continue;
}
}
// --- Extract Exports ---
for (let i = 0; i < lines.length; i++) {
const line = lines[i];
// export { a, b } or export { a, b } from 'module'
const namedExportMatch = line.match(/^\s*export\s+\{([^}]+)\}/);
if (namedExportMatch) {
const names = namedExportMatch[1].split(',').map((n) => {
const parts = n.trim().split(/\s+as\s+/);
return parts[parts.length - 1].trim();
}).filter(Boolean);
for (const name of names) {
if (!exports.includes(name))
exports.push(name);
}
continue;
}
// export default name (not function/class which are already captured)
const defaultExportMatch = line.match(/^\s*export\s+default\s+(\w+)\s*;?\s*$/);
if (defaultExportMatch) {
const name = defaultExportMatch[1];
// Skip if it's a keyword that starts a declaration (already captured)
if (name !== 'function' && name !== 'class' && name !== 'abstract') {
if (!exports.includes(name))
exports.push(name);
}
continue;
}
// export const/let/var name (not arrow functions which are already captured)
const varExportMatch = line.match(/^\s*export\s+(?:const|let|var)\s+(\w+)/);
if (varExportMatch) {
const name = varExportMatch[1];
if (!exports.includes(name))
exports.push(name);
continue;
}
}
return { functions, classes, imports, exports };
}
/**
* Get the indentation level (number of leading spaces) of a line.
* Tabs are counted as 4 spaces.
*/
function getIndentation(line) {
let count = 0;
for (const ch of line) {
if (ch === ' ')
count++;
else if (ch === '\t')
count += 4;
else
break;
}
return count;
}
/**
* Find the end line of an indentation-based block (Python).
* The block ends when a non-empty line at the same or lower indentation level is found,
* or at end of file.
*/
function findIndentBlockEnd(lines, startLineIndex) {
const baseIndent = getIndentation(lines[startLineIndex]);
for (let i = startLineIndex + 1; i < lines.length; i++) {
const line = lines[i];
// Skip empty lines and comment-only lines
if (line.trim() === '' || line.trim().startsWith('#')) {
continue;
}
const indent = getIndentation(line);
if (indent <= baseIndent) {
// The block ended at the previous non-empty line
// Walk back to find the last non-empty line of the block
for (let j = i - 1; j > startLineIndex; j--) {
if (lines[j].trim() !== '') {
return j + 1; // 1-indexed
}
}
return startLineIndex + 1; // 1-indexed, just the def/class line itself
}
}
// Block extends to end of file — find last non-empty line
for (let j = lines.length - 1; j > startLineIndex; j--) {
if (lines[j].trim() !== '') {
return j + 1; // 1-indexed
}
}
return lines.length;
}
/**
* Extract parameter names from a Python params string, stripping type annotations and defaults.
* e.g. "self, name: str, age: int = 0, *args, **kwargs" -> ["self", "name", "age", "args", "kwargs"]
*/
function extractPythonParamNames(paramsStr) {
if (!paramsStr.trim())
return [];
const params = [];
let depth = 0;
let current = '';
for (const ch of paramsStr) {
if (ch === '(' || ch === '[' || ch === '{') {
depth++;
current += ch;
}
else if (ch === ')' || ch === ']' || ch === '}') {
depth--;
current += ch;
}
else if (ch === ',' && depth === 0) {
params.push(current);
current = '';
}
else {
current += ch;
}
}
if (current.trim()) {
params.push(current);
}
return params
.map((p) => {
let trimmed = p.trim();
// Strip leading * or **
trimmed = trimmed.replace(/^\*\*/, '').replace(/^\*/, '');
// Strip type annotation (everything after :) and default (everything after =)
const name = trimmed.split(/[=:]/)[0].trim();
return name;
})
.filter((name) => name !== '' && name !== '/');
}
/**
* Parse Python content and extract structural information.
*/
export function parsePython(content) {
const lines = content.split('\n');
const functions = [];
const classes = [];
const imports = [];
const exports = [];
// Check for __all__ definition
let allNames = null;
const allMatch = content.match(/__all__\s*=\s*\[([^\]]*)\]/s);
if (allMatch) {
allNames = allMatch[1]
.split(',')
.map((s) => s.trim().replace(/^['"]|['"]$/g, ''))
.filter(Boolean);
}
// --- Extract Classes first (to know which line ranges are class bodies) ---
const classRanges = [];
for (let i = 0; i < lines.length; i++) {
const line = lines[i];
// Match class definitions: class Name: or class Name(Base):
const classMatch = line.match(/^(\s*)class\s+(\w+)\s*(?:\([^)]*\))?\s*:/);
if (classMatch) {
const classIndent = getIndentation(line);
const name = classMatch[2];
const startLine = i + 1; // 1-indexed
const endLine = findIndentBlockEnd(lines, i);
const isExported = !name.startsWith('_');
// Extract methods within the class body
const methods = [];
for (let j = i + 1; j < endLine; j++) {
const bodyLine = lines[j];
const methodMatch = bodyLine.match(/^(\s*)(?:async\s+)?def\s+(\w+)\s*\(/);
if (methodMatch) {
const methodIndent = getIndentation(bodyLine);
// Method must be indented more than the class
if (methodIndent > classIndent) {
methods.push(methodMatch[2]);
}
}
}
classes.push({
name,
startLine,
endLine,
methods,
isExported,
});
classRanges.push({ start: i, end: endLine - 1 }); // 0-indexed range
if (isExported) {
if (!exports.includes(name))
exports.push(name);
}
}
}
// --- Extract Functions (skip those inside class bodies) ---
for (let i = 0; i < lines.length; i++) {
const line = lines[i];
// Skip lines that are inside a class body
const insideClass = classRanges.some((r) => i > r.start && i <= r.end);
if (insideClass)
continue;
// Match function definitions: def name(params): or async def name(params):
const funcMatch = line.match(/^(\s*)(?:async\s+)?def\s+(\w+)\s*\(([^)]*)\)\s*(?:->.*?)?\s*:/);
if (funcMatch) {
const funcIndent = getIndentation(line);
const name = funcMatch[2];
const paramsStr = funcMatch[3];
const startLine = i + 1; // 1-indexed
const endLine = findIndentBlockEnd(lines, i);
// A top-level function (indent 0) that doesn't start with _ is "exported"
const isExported = funcIndent === 0 && !name.startsWith('_');
const params = extractPythonParamNames(paramsStr);
functions.push({
name,
startLine,
endLine,
params,
isExported,
});
if (isExported) {
if (!exports.includes(name))
exports.push(name);
}
}
}
// --- Extract Imports ---
for (let i = 0; i < lines.length; i++) {
const line = lines[i];
// from module import (name1, name2) — multi-line
const fromMultiLineMatch = line.match(/^\s*from\s+([\w.]+)\s+import\s*\(/);
if (fromMultiLineMatch) {
const source = fromMultiLineMatch[1];
let namesStr = '';
// Collect names until closing paren
const afterParen = line.substring(line.indexOf('(') + 1);
if (afterParen.includes(')')) {
// Single line with parens: from module import (a, b)
namesStr = afterParen.substring(0, afterParen.indexOf(')'));
}
else {
namesStr = afterParen;
for (let j = i + 1; j < lines.length; j++) {
const contLine = lines[j];
if (contLine.includes(')')) {
namesStr += contLine.substring(0, contLine.indexOf(')'));
break;
}
namesStr += contLine;
}
}
const names = namesStr
.split(',')
.map((n) => {
const parts = n.trim().split(/\s+as\s+/);
return parts[parts.length - 1].trim();
})
.filter(Boolean);
imports.push({ source, names });
continue;
}
// from module import name1, name2 (single line)
const fromImportMatch = line.match(/^\s*from\s+([\w.]+)\s+import\s+(.+)$/);
if (fromImportMatch) {
const source = fromImportMatch[1];
const importPart = fromImportMatch[2].trim();
if (importPart === '*') {
imports.push({ source, names: ['*'] });
}
else {
const names = importPart
.split(',')
.map((n) => {
const parts = n.trim().split(/\s+as\s+/);
return parts[parts.length - 1].trim();
})
.filter(Boolean);
imports.push({ source, names });
}
continue;
}
// import module or import module as alias or import mod1, mod2
const importMatch = line.match(/^\s*import\s+(.+)$/);
if (importMatch) {
const importPart = importMatch[1].trim();
const modules = importPart.split(',');
for (const mod of modules) {
const parts = mod.trim().split(/\s+as\s+/);
const source = parts[0].trim();
const alias = parts.length > 1 ? parts[parts.length - 1].trim() : source;
imports.push({ source, names: [alias] });
}
continue;
}
}
// If __all__ is defined, use it as the exports list
if (allNames) {
// Replace exports with __all__ contents
exports.length = 0;
exports.push(...allNames);
}
return { functions, classes, imports, exports };
}
/**
* Parse Go content and extract structural information.
*/
export function parseGo(content) {
const lines = content.split('\n');
const functions = [];
const classes = [];
const imports = [];
const exports = [];
// --- Extract Imports ---
for (let i = 0; i < lines.length; i++) {
const line = lines[i];
// Single import: import "pkg"
const singleImport = line.match(/^\s*import\s+"([^"]+)"/);
if (singleImport) {
const source = singleImport[1];
const name = source.split('/').pop() || source;
imports.push({ source, names: [name] });
continue;
}
// Block import: import ( ... )
const blockImportStart = line.match(/^\s*import\s*\(/);
if (blockImportStart) {
for (let j = i + 1; j < lines.length; j++) {
const importLine = lines[j].trim();
if (importLine === ')')
break;
// Match "pkg" or alias "pkg"
const pkgMatch = importLine.match(/^(?:(\w+)\s+)?"([^"]+)"/);
if (pkgMatch) {
const alias = pkgMatch[1];
const source = pkgMatch[2];
const name = alias || source.split('/').pop() || source;
imports.push({ source, names: [name] });
}
}
continue;
}
}
// --- Extract Types (struct/interface as classes) ---
for (let i = 0; i < lines.length; i++) {
const line = lines[i];
// type Name struct { or type Name interface {
const typeMatch = line.match(/^\s*type\s+(\w+)\s+(?:struct|interface)\s*\{/);
if (typeMatch) {
const name = typeMatch[1];
const startLine = i + 1;
const endLine = findBlockEnd(lines, i);
const isExported = /^[A-Z]/.test(name);
// Extract methods from interface bodies
const methods = [];
const bodyLines = lines.slice(i + 1, endLine - 1);
for (const bodyLine of bodyLines) {
const methodMatch = bodyLine.match(/^\s+(\w+)\s*\(/);
if (methodMatch) {
methods.push(methodMatch[1]);
}
}
classes.push({ name, startLine, endLine, methods, isExported });
if (isExported && !exports.includes(name))
exports.push(name);
}
}
// --- Extract Functions ---
for (let i = 0; i < lines.length; i++) {
const line = lines[i];
// func (receiver) Name(params) or func Name(params)
const funcMatch = line.match(/^\s*func\s+(?:\(\s*\w+\s+\*?\w+\s*\)\s+)?(\w+)\s*\(([^)]*)\)/);
if (funcMatch) {
const name = funcMatch[1];
const paramsStr = funcMatch[2];
const startLine = i + 1;
const endLine = line.includes('{') ? findBlockEnd(lines, i) : i + 1;
const isExported = /^[A-Z]/.test(name);
// Extract param names (Go params: name type, name type)
const params = paramsStr
.split(',')
.map((p) => p.trim().split(/\s+/)[0])
.filter((p) => p && p !== '');
functions.push({ name, startLine, endLine, params, isExported });
if (isExported && !exports.includes(name))
exports.push(name);
}
}
return { functions, classes, imports, exports };
}
/**
* Parse Rust content and extract structural information.
*/
export function parseRust(content) {
const lines = content.split('\n');
const functions = [];
const classes = [];
const imports = [];
const exports = [];
// --- Extract Imports (use statements) ---
for (let i = 0; i < lines.length; i++) {
const line = lines[i];
// use path::to::item; or use path::to::{item1, item2};
const useMatch = line.match(/^\s*(?:pub\s+)?use\s+(.+);/);
if (useMatch) {
const usePath = useMatch[1].trim();
// Handle grouped imports: use std::collections::{HashMap, HashSet};
const groupMatch = usePath.match(/^(.+)::\{([^}]+)\}/);
if (groupMatch) {
const basePath = groupMatch[1];
const names = groupMatch[2].split(',').map((n) => n.trim().split(/\s+as\s+/).pop().trim()).filter(Boolean);
imports.push({ source: basePath, names });
}
else {
// Simple use: use std::collections::HashMap;
const parts = usePath.split('::');
const name = parts[parts.length - 1].split(/\s+as\s+/).pop().trim();
imports.push({ source: usePath, names: [name] });
}
continue;
}
}
// --- Extract Structs/Enums as classes ---
for (let i = 0; i < lines.length; i++) {
const line = lines[i];
// pub struct Name { or struct Name { or pub enum Name { or enum Name {
const structMatch = line.match(/^\s*(pub\s+)?(?:struct|enum)\s+(\w+)/);
if (structMatch) {
const isExported = !!structMatch[1];
const name = structMatch[2];
const startLine = i + 1;
const endLine = line.includes('{') ? findBlockEnd(lines, i) : i + 1;
classes.push({ name, startLine, endLine, methods: [], isExported });
if (isExported && !exports.includes(name))
exports.push(name);
}
}
// --- Extract impl blocks and their methods ---
for (let i = 0; i < lines.length; i++) {
const line = lines[i];
// impl Name { or impl Trait for Name {
const implMatch = line.match(/^\s*impl\s+(?:\w+\s+for\s+)?(\w+)\s*\{/);
if (implMatch) {
const implName = implMatch[1];
const implEnd = findBlockEnd(lines, i);
// Find the class entry for this impl
const classEntry = classes.find((c) => c.name === implName);
// Extract methods inside impl block
for (let j = i + 1; j < implEnd - 1; j++) {
const bodyLine = lines[j];
const fnMatch = bodyLine.match(/^\s*(pub\s+)?(?:async\s+)?fn\s+(\w+)\s*\(([^)]*)\)/);
if (fnMatch) {
const fnIsExported = !!fnMatch[1];
const fnName = fnMatch[2];
const fnParamsStr = fnMatch[3];
const fnStartLine = j + 1;
const fnEndLine = bodyLine.includes('{') ? findBlockEnd(lines, j) : j + 1;
// Extract param names (skip self/&self/&mut self)
const params = fnParamsStr
.split(',')
.map((p) => p.trim())
.filter((p) => p && !p.startsWith('&') && p !== 'self' && !p.startsWith('mut self'))
.map((p) => p.split(':')[0].replace(/^mut\s+/, '').trim())
.filter(Boolean);
if (classEntry) {
classEntry.methods.push(fnName);
}
functions.push({ name: fnName, startLine: fnStartLine, endLine: fnEndLine, params, isExported: fnIsExported });
if (fnIsExported && !exports.includes(fnName))
exports.push(fnName);
}
}
}
}
// --- Extract standalone functions (not inside impl blocks) ---
const implRanges = [];
for (let i = 0; i < lines.length; i++) {
const line = lines[i];
if (line.match(/^\s*impl\s+/)) {
const end = findBlockEnd(lines, i);
implRanges.push({ start: i, end: end - 1 });
}
}
for (let i = 0; i < lines.length; i++) {
const line = lines[i];
// Skip lines inside impl blocks
const insideImpl = implRanges.some((r) => i >= r.start && i <= r.end);
if (insideImpl)
continue;
// pub fn name(params) or fn name(params) or pub async fn name(params)
const fnMatch = line.match(/^\s*(pub\s+)?(?:async\s+)?fn\s+(\w+)\s*\(([^)]*)\)/);
if (fnMatch) {
const isExported = !!fnMatch[1];
const name = fnMatch[2];
const paramsStr = fnMatch[3];
const startLine = i + 1;
const endLine = line.includes('{') ? findBlockEnd(lines, i) : i + 1;
const params = paramsStr
.split(',')
.map((p) => p.trim())
.filter((p) => p && !p.startsWith('&') && p !== 'self' && !p.startsWith('mut self'))
.map((p) => p.split(':')[0].replace(/^mut\s+/, '').trim())
.filter(Boolean);
functions.push({ name, startLine, endLine, params, isExported });
if (isExported && !exports.includes(name))
exports.push(name);
}
}
return { functions, classes, imports, exports };
}
/**
* Parse Java content and extract structural information.
*/
export function parseJava(content) {
const lines = content.split('\n');
const functions = [];
const classes = [];
const imports = [];
const exports = [];
// --- Extract Imports ---
for (let i = 0; i < lines.length; i++) {
const line = lines[i];
// import package.Class; or import static package.Class.method;
const importMatch = line.match(/^\s*import\s+(?:static\s+)?([^;]+);/);
if (importMatch) {
const source = importMatch[1].trim();
const parts = source.split('.');
const name = parts[parts.length - 1];
imports.push({ source, names: [name] });
}
}
// --- Extract Classes/Interfaces ---
for (let i = 0; i < lines.length; i++) {
const line = lines[i];
// public/protected/private class Name or interface Name
const classMatch = line.match(/^\s*(?:(public|protected|private)\s+)?(?:abstract\s+)?(?:static\s+)?(?:final\s+)?(?:class|interface|enum)\s+(\w+)/);
if (classMatch) {
const visibility = classMatch[1];
const name = classMatch[2];
const startLine = i + 1;
const endLine = findBlockEnd(lines, i);
const isExported = visibility === 'public';
// Extract methods within the class body
// We need to track depth so we only match methods at the class body level (depth 1)
const methods = [];
let depth = 0;
for (let j = i; j < endLine; j++) {
const bodyLine = lines[j];
// Check for method BEFORE counting braces on this line
// A method declaration at class level means depth is currently 1
if (depth === 1) {
const methodMatch = bodyLine.match(/^\s*(?:(?:public|protected|private)\s+)?(?:static\s+)?(?:final\s+)?(?:abstract\s+)?(?:synchronized\s+)?(?:\w+(?:<[^>]*>)?(?:\[\])*)\s+(\w+)\s*\(/);
if (methodMatch) {
const methodName = methodMatch[1];
if (methodName !== 'if' && methodName !== 'for' && methodName !== 'while' && methodName !== 'switch' && methodName !== 'catch') {
if (!methods.includes(methodName)) {
methods.push(methodName);
}
}
}
// Also match constructors
const ctorMatch = bodyLine.match(new RegExp(`^\\s*(?:(?:public|protected|private)\\s+)?${name}\\s*\\(`));
if (ctorMatch && !methods.includes(name)) {
methods.push(name);
}
}
// Count braces after checking for methods
for (const ch of bodyLine) {
if (ch === '{')
depth++;
if (ch === '}')
depth--;
}
}
classes.push({ name, startLine, endLine, methods, isExported });
if (isExported && !exports.includes(name))
exports.push(name);
// Extract methods as functions too
depth = 0;
for (let j = i; j < endLine; j++) {
const bodyLine = lines[j];
if (depth === 1) {
const methodMatch = bodyLine.match(/^\s*(public|protected|private)?\s*(?:static\s+)?(?:final\s+)?(?:abstract\s+)?(?:synchronized\s+)?(?:\w+(?:<[^>]*>)?(?:\[\])*)\s+(\w+)\s*\(([^)]*)\)/);
if (methodMatch) {
const methodVisibility = methodMatch[1];
const methodName = methodMatch[2];
const methodParams = methodMatch[3];
if (methodName !== 'if' && methodName !== 'for' && methodName !== 'while' && methodName !== 'switch' && methodName !== 'catch') {
const fnIsExported = methodVisibility === 'public';
const fnStartLine = j + 1;
const fnEndLine = bodyLine.includes('{') ? findBlockEnd(lines, j) : j + 1;
// Extract param names from Java params (Type name, Type name)
const params = methodParams
.split(',')
.map((p) => {
const parts = p.trim().split(/\s+/);
return parts.length >= 2 ? parts[parts.length - 1] : '';
})
.filter(Boolean);
functions.push({ name: methodName, startLine: fnStartLine, endLine: fnEndLine, params, isExported: fnIsExported });
if (fnIsExported && !exports.includes(methodName))
exports.push(methodName);
}
}
}
for (const ch of bodyLine) {
if (ch === '{')
depth++;
if (ch === '}')
depth--;
}
}
}
}
return { functions, classes, imports, exports };
}
/**
* Parse generic content (PHP, Ruby, or unknown languages) using common patterns.
* Falls back to empty result if nothing is detected.
*/
export function parseGeneric(content) {
const lines = content.split('\n');
const functions = [];
const classes = [];
const imports = [];
const exports = [];
// --- Extract Imports ---
for (let i = 0; i < lines.length; i++) {
const line = lines[i];
// PHP: use Namespace\Class;
const phpUseMatch = line.match(/^\s*use\s+([^;]+);/);
if (phpUseMatch) {
const source = phpUseMatch[1].trim();
const parts = source.split('\\');
const name = parts[parts.length - 1];
imports.push({ source, names: [name] });
continue;
}
// PHP: require/include
const phpRequireMatch = line.match(/^\s*(?:require|include)(?:_once)?\s+['"]([^'"]+)['"]/);
if (phpRequireMatch) {
const source = phpRequireMatch[1];
const name = source.split('/').pop()?.replace(/\.\w+$/, '') || source;
imports.push({ source, names: [name] });
continue;
}
// Ruby: require 'gem' or require_relative 'file'
const rubyRequireMatch = line.match(/^\s*require(?:_relative)?\s+['"]([^'"]+)['"]/);
if (rubyRequireMatch) {
const source = rubyRequireMatch[1];
const name = source.split('/').pop() || source;
imports.push({ source, names: [name] });
continue;
}
}
// --- Extract Classes ---
for (let i = 0; i < lines.length; i++) {
const line = lines[i];
// class Name (PHP, Ruby, etc.)
const classMatch = line.match(/^\s*(?:(?:public|abstract|final)\s+)*class\s+(\w+)/);
if (classMatch) {
const name = classMatch[1];
const startLine = i + 1;
// Use brace counting for PHP, indentation-based for Ruby
let endLine;
if (line.includes('{') || (i + 1 < lines.length && lines[i + 1]?.trim() === '{')) {
endLine = findBlockEnd(lines, i);
}
else {
// Ruby-style: look for 'end' keyword at same indentation
endLine = findRubyBlockEnd(lines, i);
}
// Extract methods
const methods = [];
for (let j = i + 1; j < endLine; j++) {
const bodyLine = lines[j];
const methodMatch = bodyLine.match(/^\s*(?:(?:public|protected|private)\s+)?(?:static\s+)?(?:function|def)\s+(\w+)/);
if (methodMatch) {
methods.push(methodMatch[1]);
}
}
classes.push({ name, startLine, endLine, methods, isExported: true });
if (!exports.includes(name))
exports.push(name);
}
}
// --- Extract Functions ---
for (let i = 0; i < lines.length; i++) {
const line = lines[i];
// Skip lines inside class bodies
const insideClass = classes.some((c) => (i + 1) > c.startLine && (i + 1) < c.endLine);
if (insideClass)
continue;
// function name( (PHP), def name( (Ruby/Python-like), fn name( (Rust-like)
const funcMatch = line.match(/^\s*(?:(?:public|protected|private)\s+)?(?:static\s+)?(?:function|def|fn)\s+(\w+)\s*\(([^)]*)\)/);
if (funcMatch) {
const name = funcMatch[1];
const paramsStr = funcMatch[2];
const startLine = i + 1;
let endLine;
if (line.includes('{') || (i + 1 < lines.length && lines[i + 1]?.trim() === '{')) {
endLine = findBlockEnd(lines, i);
}
else {
// Ruby-style: look for 'end' keyword
endLine = findRubyBlockEnd(lines, i);
}
const params = paramsStr
.split(',')
.map((p) => p.trim().replace(/^\$/, '').split(/[=:]/)[0].trim())
.filter(Boolean);
functions.push({ name, startLine, endLine, params, isExported: true });
if (!exports.includes(name))
exports.push(name);
}
}
return { functions, classes, imports, exports };
}
/**
* Find the end of a Ruby-style block (terminated by 'end' keyword at same or lower indentation).
*/
function findRubyBlockEnd(lines, startLineIndex) {
const baseIndent = getIndentation(lines[startLineIndex]);
for (let i = startLineIndex + 1; i < lines.length; i++) {
const line = lines[i];
if (line.trim() === '')
continue;
const indent = getIndentation(line);
if (indent <= baseIndent && line.trim() === 'end') {
return i + 1; // 1-indexed
}
}
// If no 'end' found, return end of file
return lines.length;
}
/**
* Parses a source file and extracts structural information using
* regex-based extraction (no tree-sitter dependency).
*
* @param filePath - Absolute path to the file to parse
* @param language - Detected language of the file (e.g., 'typescript', 'python')
* @returns ParseResult with extracted functions, classes, imports, and exports
*/
export function parseFile(filePath, language) {
const content = readFileSync(filePath, 'utf-8');
if (language === 'typescript' || language === 'javascript') {
return parseTypeScript(content);
}
if (language === 'python') {
return parsePython(content);
}
if (language === 'go') {
return parseGo(content);
}
if (language === 'rust') {
return parseRust(content);
}
if (language === 'java') {
return parseJava(content);
}
// Ruby, PHP, and anything else → generic parser
return parseGeneric(content);
}