File size: 9,938 Bytes

40d7073

"use strict";
/**
 * Worker Benchmark Suite for RuVector
 *
 * Measures performance of:
 * - ONNX embedding generation (single vs batch)
 * - Vector storage and search
 * - Phase execution times
 * - Worker end-to-end throughput
 */
Object.defineProperty(exports, "__esModule", { value: true });
exports.benchmarkEmbeddings = benchmarkEmbeddings;
exports.benchmarkWorkers = benchmarkWorkers;
exports.benchmarkPhases = benchmarkPhases;
exports.formatBenchmarkResults = formatBenchmarkResults;
exports.runFullBenchmark = runFullBenchmark;
const perf_hooks_1 = require("perf_hooks");
const native_worker_1 = require("./native-worker");
const onnx_embedder_1 = require("../core/onnx-embedder");
/**
 * Run a benchmark function multiple times and collect stats
 */
async function runBenchmark(name, fn, iterations = 10, warmup = 2) {
    // Warmup runs
    for (let i = 0; i < warmup; i++) {
        await fn();
    }
    // Actual benchmark runs
    const times = [];
    for (let i = 0; i < iterations; i++) {
        const start = perf_hooks_1.performance.now();
        await fn();
        times.push(perf_hooks_1.performance.now() - start);
    }
    // Calculate statistics
    times.sort((a, b) => a - b);
    const sum = times.reduce((a, b) => a + b, 0);
    return {
        name,
        iterations,
        results: {
            min: times[0],
            max: times[times.length - 1],
            avg: sum / times.length,
            p50: times[Math.floor(times.length * 0.5)],
            p95: times[Math.floor(times.length * 0.95)],
            p99: times[Math.floor(times.length * 0.99)],
        },
    };
}
/**
 * Benchmark ONNX embedding generation
 */
async function benchmarkEmbeddings(iterations = 10) {
    const results = [];
    // Initialize embedder
    await (0, onnx_embedder_1.initOnnxEmbedder)();
    const stats = (0, onnx_embedder_1.getStats)();
    console.log(`\n📊 ONNX Embedder: ${stats.dimension}d, SIMD: ${stats.simd}`);
    // Single embedding benchmark
    const singleResult = await runBenchmark('Single embedding (short text)', async () => {
        await (0, onnx_embedder_1.embed)('This is a test sentence for embedding.');
    }, iterations);
    results.push(singleResult);
    // Single embedding - long text
    const longText = 'This is a much longer text that contains more content. '.repeat(20);
    const singleLongResult = await runBenchmark('Single embedding (long text)', async () => {
        await (0, onnx_embedder_1.embed)(longText);
    }, iterations);
    results.push(singleLongResult);
    // Batch embedding - small batch
    const smallBatch = Array(4).fill(0).map((_, i) => `Test sentence number ${i}`);
    const batchSmallResult = await runBenchmark('Batch embedding (4 texts)', async () => {
        await (0, onnx_embedder_1.embedBatch)(smallBatch);
    }, iterations);
    batchSmallResult.throughput = {
        itemsPerSecond: (4 * 1000) / batchSmallResult.results.avg,
    };
    results.push(batchSmallResult);
    // Batch embedding - medium batch
    const mediumBatch = Array(16).fill(0).map((_, i) => `Test sentence number ${i} with some content`);
    const batchMediumResult = await runBenchmark('Batch embedding (16 texts)', async () => {
        await (0, onnx_embedder_1.embedBatch)(mediumBatch);
    }, iterations);
    batchMediumResult.throughput = {
        itemsPerSecond: (16 * 1000) / batchMediumResult.results.avg,
    };
    results.push(batchMediumResult);
    // Batch embedding - large batch
    const largeBatch = Array(64).fill(0).map((_, i) => `Test sentence number ${i} with additional content here`);
    const batchLargeResult = await runBenchmark('Batch embedding (64 texts)', async () => {
        await (0, onnx_embedder_1.embedBatch)(largeBatch);
    }, Math.min(iterations, 5) // Fewer iterations for large batches
    );
    batchLargeResult.throughput = {
        itemsPerSecond: (64 * 1000) / batchLargeResult.results.avg,
    };
    results.push(batchLargeResult);
    return results;
}
/**
 * Benchmark worker execution
 */
async function benchmarkWorkers(targetPath = '.') {
    const results = [];
    // Security worker (no embeddings - fastest)
    const securityWorker = (0, native_worker_1.createSecurityWorker)();
    const securityResult = await runBenchmark('Security worker (no embeddings)', async () => {
        await securityWorker.run(targetPath);
    }, 5, 1);
    results.push(securityResult);
    // Analysis worker (with embeddings)
    const analysisWorker = (0, native_worker_1.createAnalysisWorker)();
    const analysisResult = await runBenchmark('Analysis worker (with embeddings)', async () => {
        await analysisWorker.run(targetPath);
    }, 3, 1);
    results.push(analysisResult);
    return results;
}
/**
 * Benchmark individual phases
 */
async function benchmarkPhases(targetPath = '.') {
    const results = [];
    // File discovery phase only
    const discoveryWorker = new native_worker_1.NativeWorker({
        name: 'discovery-only',
        phases: [{ type: 'file-discovery' }],
        capabilities: {},
    });
    const discoveryResult = await runBenchmark('Phase: file-discovery', async () => {
        await discoveryWorker.run(targetPath);
    }, 10);
    results.push(discoveryResult);
    // Pattern extraction phase
    const patternWorker = new native_worker_1.NativeWorker({
        name: 'pattern-only',
        phases: [{ type: 'file-discovery' }, { type: 'pattern-extraction' }],
        capabilities: {},
    });
    const patternResult = await runBenchmark('Phase: pattern-extraction', async () => {
        await patternWorker.run(targetPath);
    }, 5);
    results.push(patternResult);
    // Embedding generation phase
    const embeddingWorker = new native_worker_1.NativeWorker({
        name: 'embedding-only',
        phases: [
            { type: 'file-discovery', config: { patterns: ['**/*.ts'], exclude: ['**/node_modules/**'] } },
            { type: 'pattern-extraction' },
            { type: 'embedding-generation' },
        ],
        capabilities: { onnxEmbeddings: true },
    });
    const embeddingResult = await runBenchmark('Phase: embedding-generation', async () => {
        await embeddingWorker.run(targetPath);
    }, 3, 1);
    results.push(embeddingResult);
    return results;
}
/**
 * Format benchmark results as table
 */
function formatBenchmarkResults(results) {
    const lines = [];
    lines.push('');
    lines.push('┌─────────────────────────────────────┬──────────┬──────────┬──────────┬──────────┬──────────────┐');
    lines.push('│ Benchmark                           │ Min (ms) │ Avg (ms) │ P95 (ms) │ Max (ms) │ Throughput   │');
    lines.push('├─────────────────────────────────────┼──────────┼──────────┼──────────┼──────────┼──────────────┤');
    for (const result of results) {
        const name = result.name.padEnd(35).slice(0, 35);
        const min = result.results.min.toFixed(1).padStart(8);
        const avg = result.results.avg.toFixed(1).padStart(8);
        const p95 = result.results.p95.toFixed(1).padStart(8);
        const max = result.results.max.toFixed(1).padStart(8);
        const throughput = result.throughput
            ? `${result.throughput.itemsPerSecond.toFixed(1)}/s`.padStart(12)
            : '           -';
        lines.push(`│ ${name} │ ${min} │ ${avg} │ ${p95} │ ${max} │ ${throughput} │`);
    }
    lines.push('└─────────────────────────────────────┴──────────┴──────────┴──────────┴──────────┴──────────────┘');
    lines.push('');
    return lines.join('\n');
}
/**
 * Run full benchmark suite
 */
async function runFullBenchmark(targetPath = '.') {
    console.log('🚀 RuVector Native Worker Benchmark Suite\n');
    console.log('='.repeat(60));
    // Embeddings benchmark
    console.log('\n📊 Benchmarking ONNX Embeddings...');
    const embeddings = await benchmarkEmbeddings(10);
    console.log(formatBenchmarkResults(embeddings));
    // Phases benchmark
    console.log('\n⚡ Benchmarking Individual Phases...');
    const phases = await benchmarkPhases(targetPath);
    console.log(formatBenchmarkResults(phases));
    // Workers benchmark
    console.log('\n🔧 Benchmarking Full Workers...');
    const workers = await benchmarkWorkers(targetPath);
    console.log(formatBenchmarkResults(workers));
    // Summary
    const stats = (0, onnx_embedder_1.getStats)();
    const summary = `
RuVector Native Worker Benchmark Summary
========================================
ONNX Model: all-MiniLM-L6-v2 (${stats.dimension}d)
SIMD: ${stats.simd ? 'Enabled ✓' : 'Disabled'}
Parallel Workers: ${stats.parallel ? `${stats.parallelWorkers} workers` : 'Disabled'}

Embedding Performance:
  Single: ${embeddings[0].results.avg.toFixed(1)}ms avg
  Batch (16): ${embeddings[3].results.avg.toFixed(1)}ms avg (${embeddings[3].throughput?.itemsPerSecond.toFixed(0)}/s)
  Batch (64): ${embeddings[4].results.avg.toFixed(1)}ms avg (${embeddings[4].throughput?.itemsPerSecond.toFixed(0)}/s)

Worker Performance:
  Security scan: ${workers[0].results.avg.toFixed(0)}ms avg
  Full analysis: ${workers[1].results.avg.toFixed(0)}ms avg
`;
    console.log(summary);
    return { embeddings, phases, workers, summary };
}
exports.default = {
    benchmarkEmbeddings,
    benchmarkWorkers,
    benchmarkPhases,
    runFullBenchmark,
    formatBenchmarkResults,
};