import { describe, it, expect, beforeEach, afterEach } from 'vitest';
import * as fs from 'node:fs';
import * as path from 'node:path';
import * as os from 'node:os';
import { scanProject, DEFAULT_IGNORE_DIRS, DEFAULT_IGNORE_FILE_PATTERNS, detectMetadata } from './scanner';
function createTempDir() {
return fs.mkdtempSync(path.join(os.tmpdir(), 'scanner-test-'));
}
function removeTempDir(dir) {
fs.rmSync(dir, { recursive: true, force: true });
}
function writeFile(dir, relativePath, content) {
const fullPath = path.join(dir, relativePath);
fs.mkdirSync(path.dirname(fullPath), { recursive: true });
fs.writeFileSync(fullPath, content, 'utf-8');
}
describe('scanProject', () => {
let tempDir;
beforeEach(() => {
tempDir = createTempDir();
});
afterEach(() => {
removeTempDir(tempDir);
});
it('should discover files recursively', () => {
writeFile(tempDir, 'src/index.ts', 'const x = 1;\n');
writeFile(tempDir, 'src/utils/helper.ts', 'export function help() {}\n');
writeFile(tempDir, 'README.md', '# Hello\n');
const result = scanProject(tempDir);
expect(result.files).toHaveLength(3);
const relativePaths = result.files.map(f => f.relativePath).sort();
expect(relativePaths).toEqual(['README.md', 'src/index.ts', 'src/utils/helper.ts']);
});
it('should exclude default ignored directories', () => {
writeFile(tempDir, 'src/index.ts', 'code\n');
writeFile(tempDir, 'node_modules/pkg/index.js', 'module\n');
writeFile(tempDir, '.git/config', 'git config\n');
writeFile(tempDir, 'dist/bundle.js', 'bundle\n');
writeFile(tempDir, 'build/output.js', 'output\n');
writeFile(tempDir, '__pycache__/mod.pyc', 'cache\n');
writeFile(tempDir, '.next/static/chunk.js', 'next\n');
writeFile(tempDir, '.cache/data', 'cache\n');
writeFile(tempDir, '.turbo/cache', 'turbo\n');
writeFile(tempDir, 'target/debug/main', 'rust\n');
writeFile(tempDir, 'obj/Debug/file.dll', 'dotnet\n');
const result = scanProject(tempDir);
expect(result.files).toHaveLength(1);
expect(result.files[0].relativePath).toBe('src/index.ts');
});
it('should exclude files matching default ignore patterns', () => {
writeFile(tempDir, 'src/app.ts', 'code\n');
writeFile(tempDir, 'package-lock.lock', 'lock\n');
writeFile(tempDir, 'assets/bundle.min.js', 'minified\n');
writeFile(tempDir, 'assets/style.min.css', 'minified\n');
writeFile(tempDir, 'assets/app.map', 'sourcemap\n');
writeFile(tempDir, 'images/logo.png', 'binary\n');
writeFile(tempDir, 'images/photo.jpg', 'binary\n');
writeFile(tempDir, 'fonts/font.woff2', 'binary\n');
writeFile(tempDir, 'media/video.mp4', 'binary\n');
writeFile(tempDir, 'archive.zip', 'binary\n');
writeFile(tempDir, 'doc.pdf', 'binary\n');
writeFile(tempDir, 'bin/app.exe', 'binary\n');
const result = scanProject(tempDir);
expect(result.files).toHaveLength(1);
expect(result.files[0].relativePath).toBe('src/app.ts');
});
it('should read and apply .understandignore patterns', () => {
writeFile(tempDir, 'src/app.ts', 'code\n');
writeFile(tempDir, 'src/generated/api.ts', 'generated\n');
writeFile(tempDir, 'tests/fixture.ts', 'test\n');
writeFile(tempDir, '.understand-anything/.understandignore', '# Comment\nsrc/generated/**\ntests/**\n');
const result = scanProject(tempDir);
expect(result.files).toHaveLength(1);
expect(result.files[0].relativePath).toBe('src/app.ts');
});
it('should handle missing .understandignore gracefully', () => {
writeFile(tempDir, 'src/app.ts', 'code\n');
const result = scanProject(tempDir);
expect(result.files).toHaveLength(1);
});
it('should detect language from file extension', () => {
writeFile(tempDir, 'app.ts', 'ts\n');
writeFile(tempDir, 'app.py', 'py\n');
writeFile(tempDir, 'app.go', 'go\n');
writeFile(tempDir, 'app.rs', 'rs\n');
writeFile(tempDir, 'app.java', 'java\n');
writeFile(tempDir, 'app.rb', 'rb\n');
writeFile(tempDir, 'app.php', 'php\n');
writeFile(tempDir, 'app.cs', 'cs\n');
writeFile(tempDir, 'app.cpp', 'cpp\n');
writeFile(tempDir, 'app.c', 'c\n');
writeFile(tempDir, 'app.swift', 'swift\n');
writeFile(tempDir, 'app.kt', 'kotlin\n');
writeFile(tempDir, 'app.sh', 'shell\n');
writeFile(tempDir, 'app.sql', 'sql\n');
writeFile(tempDir, 'app.html', 'html\n');
writeFile(tempDir, 'app.css', 'css\n');
writeFile(tempDir, 'app.json', 'json\n');
writeFile(tempDir, 'app.yaml', 'yaml\n');
writeFile(tempDir, 'app.md', 'md\n');
writeFile(tempDir, 'app.toml', 'toml\n');
writeFile(tempDir, 'app.graphql', 'graphql\n');
writeFile(tempDir, 'app.proto', 'protobuf\n');
writeFile(tempDir, 'app.tf', 'terraform\n');
writeFile(tempDir, 'Dockerfile', 'FROM node\n');
const result = scanProject(tempDir);
const byPath = new Map(result.files.map(f => [f.relativePath, f]));
expect(byPath.get('app.ts')?.language).toBe('typescript');
expect(byPath.get('app.py')?.language).toBe('python');
expect(byPath.get('app.go')?.language).toBe('go');
expect(byPath.get('app.rs')?.language).toBe('rust');
expect(byPath.get('app.java')?.language).toBe('java');
expect(byPath.get('app.rb')?.language).toBe('ruby');
expect(byPath.get('app.php')?.language).toBe('php');
expect(byPath.get('app.cs')?.language).toBe('csharp');
expect(byPath.get('app.cpp')?.language).toBe('cpp');
expect(byPath.get('app.c')?.language).toBe('c');
expect(byPath.get('app.swift')?.language).toBe('swift');
expect(byPath.get('app.kt')?.language).toBe('kotlin');
expect(byPath.get('app.sh')?.language).toBe('shell');
expect(byPath.get('app.sql')?.language).toBe('sql');
expect(byPath.get('app.html')?.language).toBe('html');
expect(byPath.get('app.css')?.language).toBe('css');
expect(byPath.get('app.json')?.language).toBe('json');
expect(byPath.get('app.yaml')?.language).toBe('yaml');
expect(byPath.get('app.md')?.language).toBe('markdown');
expect(byPath.get('app.toml')?.language).toBe('toml');
expect(byPath.get('app.graphql')?.language).toBe('graphql');
expect(byPath.get('app.proto')?.language).toBe('protobuf');
expect(byPath.get('app.tf')?.language).toBe('terraform');
expect(byPath.get('Dockerfile')?.language).toBe('dockerfile');
});
it('should detect file categories correctly', () => {
writeFile(tempDir, 'src/app.ts', 'code\n');
writeFile(tempDir, 'config.json', '{}');
writeFile(tempDir, 'README.md', '# Docs\n');
writeFile(tempDir, 'main.tf', 'resource\n');
writeFile(tempDir, 'Dockerfile', 'FROM node\n');
writeFile(tempDir, 'schema.sql', 'CREATE TABLE\n');
writeFile(tempDir, 'schema.graphql', 'type Query\n');
writeFile(tempDir, 'deploy.sh', '#!/bin/bash\n');
writeFile(tempDir, 'index.html', '\n');
writeFile(tempDir, 'style.css', 'body {}\n');
const result = scanProject(tempDir);
const byPath = new Map(result.files.map(f => [f.relativePath, f]));
expect(byPath.get('src/app.ts')?.category).toBe('code');
expect(byPath.get('config.json')?.category).toBe('config');
expect(byPath.get('README.md')?.category).toBe('docs');
expect(byPath.get('main.tf')?.category).toBe('infra');
expect(byPath.get('Dockerfile')?.category).toBe('infra');
expect(byPath.get('schema.sql')?.category).toBe('data');
expect(byPath.get('schema.graphql')?.category).toBe('data');
expect(byPath.get('deploy.sh')?.category).toBe('script');
expect(byPath.get('index.html')?.category).toBe('markup');
expect(byPath.get('style.css')?.category).toBe('markup');
});
it('should count lines correctly', () => {
writeFile(tempDir, 'one-line.ts', 'const x = 1;');
writeFile(tempDir, 'three-lines.ts', 'line1\nline2\nline3');
writeFile(tempDir, 'empty.ts', '');
const result = scanProject(tempDir);
const byPath = new Map(result.files.map(f => [f.relativePath, f]));
expect(byPath.get('one-line.ts')?.lineCount).toBe(1);
expect(byPath.get('three-lines.ts')?.lineCount).toBe(3);
expect(byPath.get('empty.ts')?.lineCount).toBe(0);
});
it('should apply additional ignore patterns passed as argument', () => {
writeFile(tempDir, 'src/app.ts', 'code\n');
writeFile(tempDir, 'src/test.spec.ts', 'test\n');
writeFile(tempDir, 'docs/guide.md', 'guide\n');
const result = scanProject(tempDir, ['*.spec.ts', 'docs/**']);
expect(result.files).toHaveLength(1);
expect(result.files[0].relativePath).toBe('src/app.ts');
});
it('should return default metadata placeholder', () => {
writeFile(tempDir, 'src/app.ts', 'code\n');
const result = scanProject(tempDir);
// No manifest files in temp dir, so name/description are empty
expect(result.metadata.name).toBe('');
expect(result.metadata.description).toBe('');
// Languages are detected from scanned files
expect(result.metadata.languages).toEqual(['typescript']);
expect(result.metadata.analyzedAt).toBeTruthy();
});
it('should handle empty directories', () => {
// Just the temp dir with no files
const result = scanProject(tempDir);
expect(result.files).toEqual([]);
});
it('should handle .understandignore with comments and blank lines', () => {
writeFile(tempDir, 'src/app.ts', 'code\n');
writeFile(tempDir, 'src/secret.ts', 'secret\n');
writeFile(tempDir, '.understand-anything/.understandignore', '# This is a comment\n\n \nsrc/secret.ts\n\n# Another comment\n');
const result = scanProject(tempDir);
expect(result.files).toHaveLength(1);
expect(result.files[0].relativePath).toBe('src/app.ts');
});
it('should detect docker-compose.yml as infra', () => {
writeFile(tempDir, 'docker-compose.yml', 'version: "3"\n');
const result = scanProject(tempDir);
expect(result.files[0].category).toBe('infra');
});
});
describe('DEFAULT_IGNORE_DIRS', () => {
it('should include all required directories', () => {
const required = [
'node_modules', '.git', 'dist', 'build', 'vendor',
'__pycache__', '.next', '.cache', '.turbo', 'target', 'obj',
];
for (const dir of required) {
expect(DEFAULT_IGNORE_DIRS).toContain(dir);
}
});
});
describe('DEFAULT_IGNORE_FILE_PATTERNS', () => {
it('should include all required patterns', () => {
const required = [
'*.lock', '*.min.js', '*.min.css', '*.map',
'*.png', '*.jpg', '*.jpeg', '*.gif', '*.svg', '*.ico',
'*.woff', '*.woff2', '*.ttf', '*.eot',
'*.mp3', '*.mp4', '*.webm',
'*.zip', '*.tar', '*.gz',
'*.pdf', '*.exe', '*.dll', '*.so', '*.dylib',
];
for (const pattern of required) {
expect(DEFAULT_IGNORE_FILE_PATTERNS).toContain(pattern);
}
});
});
describe('detectMetadata', () => {
let tempDir;
beforeEach(() => {
tempDir = createTempDir();
});
afterEach(() => {
removeTempDir(tempDir);
});
describe('package.json detection', () => {
it('should extract name and description from package.json', () => {
writeFile(tempDir, 'package.json', JSON.stringify({
name: 'my-project',
description: 'A test project',
}));
const result = detectMetadata(tempDir, []);
expect(result.name).toBe('my-project');
expect(result.description).toBe('A test project');
});
it('should detect frameworks from dependencies', () => {
writeFile(tempDir, 'package.json', JSON.stringify({
name: 'my-app',
dependencies: { react: '^18.0.0', next: '^14.0.0' },
devDependencies: { typescript: '^5.0.0' },
}));
const result = detectMetadata(tempDir, []);
expect(result.frameworks).toContain('React');
expect(result.frameworks).toContain('Next.js');
});
it('should detect express framework', () => {
writeFile(tempDir, 'package.json', JSON.stringify({
name: 'api-server',
dependencies: { express: '^4.18.0' },
}));
const result = detectMetadata(tempDir, []);
expect(result.frameworks).toContain('Express');
});
it('should detect vue framework', () => {
writeFile(tempDir, 'package.json', JSON.stringify({
name: 'vue-app',
dependencies: { vue: '^3.0.0' },
}));
const result = detectMetadata(tempDir, []);
expect(result.frameworks).toContain('Vue');
});
it('should detect nestjs framework', () => {
writeFile(tempDir, 'package.json', JSON.stringify({
name: 'nest-app',
dependencies: { '@nestjs/core': '^10.0.0' },
}));
const result = detectMetadata(tempDir, []);
expect(result.frameworks).toContain('NestJS');
});
it('should handle invalid package.json gracefully', () => {
writeFile(tempDir, 'package.json', 'not valid json {{{');
const result = detectMetadata(tempDir, []);
expect(result.name).toBe('');
expect(result.description).toBe('');
});
});
describe('pyproject.toml detection', () => {
it('should extract name and description from pyproject.toml', () => {
writeFile(tempDir, 'pyproject.toml', `
[project]
name = "my-python-project"
description = "A Python project"
dependencies = []
`);
const result = detectMetadata(tempDir, []);
expect(result.name).toBe('my-python-project');
expect(result.description).toBe('A Python project');
});
it('should detect Django framework from pyproject.toml', () => {
writeFile(tempDir, 'pyproject.toml', `
[project]
name = "django-app"
description = "A Django app"
dependencies = [
"django>=4.0",
"djangorestframework",
]
`);
const result = detectMetadata(tempDir, []);
expect(result.frameworks).toContain('Django');
});
it('should detect FastAPI framework from pyproject.toml', () => {
writeFile(tempDir, 'pyproject.toml', `
[project]
name = "fastapi-app"
description = "A FastAPI app"
dependencies = [
"fastapi[all]>=0.100.0",
"uvicorn",
]
`);
const result = detectMetadata(tempDir, []);
expect(result.frameworks).toContain('FastAPI');
});
it('should not use pyproject.toml if package.json exists', () => {
writeFile(tempDir, 'package.json', JSON.stringify({
name: 'js-project',
description: 'JS project',
}));
writeFile(tempDir, 'pyproject.toml', `
[project]
name = "py-project"
description = "Python project"
`);
const result = detectMetadata(tempDir, []);
expect(result.name).toBe('js-project');
});
});
describe('Cargo.toml detection', () => {
it('should extract name and description from Cargo.toml', () => {
writeFile(tempDir, 'Cargo.toml', `
[package]
name = "my-rust-crate"
description = "A Rust crate"
version = "0.1.0"
`);
const result = detectMetadata(tempDir, []);
expect(result.name).toBe('my-rust-crate');
expect(result.description).toBe('A Rust crate');
});
});
describe('go.mod detection', () => {
it('should extract module name from go.mod', () => {
writeFile(tempDir, 'go.mod', `module github.com/user/my-go-project
go 1.21
`);
const result = detectMetadata(tempDir, []);
expect(result.name).toBe('github.com/user/my-go-project');
});
});
describe('pom.xml detection', () => {
it('should extract name from pom.xml', () => {
writeFile(tempDir, 'pom.xml', `
com.example
my-java-app
My Java App
A Java application
`);
const result = detectMetadata(tempDir, []);
expect(result.name).toBe('My Java App');
expect(result.description).toBe('A Java application');
});
it('should fall back to artifactId if name is missing', () => {
writeFile(tempDir, 'pom.xml', `
com.example
my-java-app
`);
const result = detectMetadata(tempDir, []);
expect(result.name).toBe('my-java-app');
});
});
describe('language detection from files', () => {
it('should detect languages from file entries sorted by frequency', () => {
const files = [
{ path: '/p/a.ts', relativePath: 'a.ts', category: 'code', language: 'typescript', lineCount: 10 },
{ path: '/p/b.ts', relativePath: 'b.ts', category: 'code', language: 'typescript', lineCount: 20 },
{ path: '/p/c.ts', relativePath: 'c.ts', category: 'code', language: 'typescript', lineCount: 5 },
{ path: '/p/d.py', relativePath: 'd.py', category: 'code', language: 'python', lineCount: 15 },
{ path: '/p/e.go', relativePath: 'e.go', category: 'code', language: 'go', lineCount: 8 },
];
const result = detectMetadata(tempDir, files);
expect(result.languages[0]).toBe('typescript');
expect(result.languages[1]).toBe('python');
expect(result.languages[2]).toBe('go');
});
it('should exclude unknown language files', () => {
const files = [
{ path: '/p/a.ts', relativePath: 'a.ts', category: 'code', language: 'typescript', lineCount: 10 },
{ path: '/p/b.xyz', relativePath: 'b.xyz', category: 'code', language: 'unknown', lineCount: 5 },
];
const result = detectMetadata(tempDir, files);
expect(result.languages).toEqual(['typescript']);
});
it('should include all languages with at least 1 file', () => {
const files = [
{ path: '/p/a.ts', relativePath: 'a.ts', category: 'code', language: 'typescript', lineCount: 10 },
{ path: '/p/b.json', relativePath: 'b.json', category: 'config', language: 'json', lineCount: 5 },
{ path: '/p/c.md', relativePath: 'c.md', category: 'docs', language: 'markdown', lineCount: 3 },
];
const result = detectMetadata(tempDir, files);
expect(result.languages).toContain('typescript');
expect(result.languages).toContain('json');
expect(result.languages).toContain('markdown');
});
});
describe('framework detection from file patterns', () => {
it('should detect Next.js from next.config.mjs', () => {
const files = [
{ path: '/p/next.config.mjs', relativePath: 'next.config.mjs', category: 'config', language: 'javascript', lineCount: 5 },
];
const result = detectMetadata(tempDir, files);
expect(result.frameworks).toContain('Next.js');
});
it('should detect Astro from astro.config.mjs', () => {
const files = [
{ path: '/p/astro.config.mjs', relativePath: 'astro.config.mjs', category: 'config', language: 'javascript', lineCount: 5 },
];
const result = detectMetadata(tempDir, files);
expect(result.frameworks).toContain('Astro');
});
it('should detect TailwindCSS from tailwind.config.js', () => {
const files = [
{ path: '/p/tailwind.config.js', relativePath: 'tailwind.config.js', category: 'config', language: 'javascript', lineCount: 5 },
];
const result = detectMetadata(tempDir, files);
expect(result.frameworks).toContain('TailwindCSS');
});
it('should detect Vite from vite.config.ts', () => {
const files = [
{ path: '/p/vite.config.ts', relativePath: 'vite.config.ts', category: 'config', language: 'typescript', lineCount: 5 },
];
const result = detectMetadata(tempDir, files);
expect(result.frameworks).toContain('Vite');
});
it('should detect Django from manage.py', () => {
const files = [
{ path: '/p/manage.py', relativePath: 'manage.py', category: 'code', language: 'python', lineCount: 10 },
];
const result = detectMetadata(tempDir, files);
expect(result.frameworks).toContain('Django');
});
it('should detect Flask from app.py imports', () => {
writeFile(tempDir, 'app.py', 'from flask import Flask\napp = Flask(__name__)\n');
const files = [
{ path: path.join(tempDir, 'app.py'), relativePath: 'app.py', category: 'code', language: 'python', lineCount: 2 },
];
const result = detectMetadata(tempDir, files);
expect(result.frameworks).toContain('Flask');
});
it('should detect FastAPI from app.py imports', () => {
writeFile(tempDir, 'app.py', 'from fastapi import FastAPI\napp = FastAPI()\n');
const files = [
{ path: path.join(tempDir, 'app.py'), relativePath: 'app.py', category: 'code', language: 'python', lineCount: 2 },
];
const result = detectMetadata(tempDir, files);
expect(result.frameworks).toContain('FastAPI');
});
it('should detect Ruby on Rails from Gemfile', () => {
writeFile(tempDir, 'Gemfile', "source 'https://rubygems.org'\ngem 'rails', '~> 7.0'\n");
const files = [];
const result = detectMetadata(tempDir, files);
expect(result.frameworks).toContain('Ruby on Rails');
});
});
describe('git commit hash', () => {
it('should return empty string for non-git directories', () => {
const result = detectMetadata(tempDir, []);
expect(result.gitCommitHash).toBe('');
});
});
describe('analyzedAt', () => {
it('should set analyzedAt to a valid ISO timestamp', () => {
const before = new Date().toISOString();
const result = detectMetadata(tempDir, []);
const after = new Date().toISOString();
expect(result.analyzedAt).toBeTruthy();
expect(result.analyzedAt >= before).toBe(true);
expect(result.analyzedAt <= after).toBe(true);
});
});
describe('integration with scanProject', () => {
it('should populate metadata when scanning a project with package.json', () => {
writeFile(tempDir, 'package.json', JSON.stringify({
name: 'test-project',
description: 'Test description',
dependencies: { react: '^18.0.0' },
}));
writeFile(tempDir, 'src/index.tsx', 'export default function App() {}\n');
writeFile(tempDir, 'src/utils.ts', 'export const x = 1;\n');
const result = scanProject(tempDir);
expect(result.metadata.name).toBe('test-project');
expect(result.metadata.description).toBe('Test description');
expect(result.metadata.languages).toContain('typescript');
expect(result.metadata.frameworks).toContain('React');
expect(result.metadata.analyzedAt).toBeTruthy();
});
});
});