File size: 9,938 Bytes
40d7073 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 | "use strict";
/**
* Worker Benchmark Suite for RuVector
*
* Measures performance of:
* - ONNX embedding generation (single vs batch)
* - Vector storage and search
* - Phase execution times
* - Worker end-to-end throughput
*/
Object.defineProperty(exports, "__esModule", { value: true });
exports.benchmarkEmbeddings = benchmarkEmbeddings;
exports.benchmarkWorkers = benchmarkWorkers;
exports.benchmarkPhases = benchmarkPhases;
exports.formatBenchmarkResults = formatBenchmarkResults;
exports.runFullBenchmark = runFullBenchmark;
const perf_hooks_1 = require("perf_hooks");
const native_worker_1 = require("./native-worker");
const onnx_embedder_1 = require("../core/onnx-embedder");
/**
* Run a benchmark function multiple times and collect stats
*/
async function runBenchmark(name, fn, iterations = 10, warmup = 2) {
// Warmup runs
for (let i = 0; i < warmup; i++) {
await fn();
}
// Actual benchmark runs
const times = [];
for (let i = 0; i < iterations; i++) {
const start = perf_hooks_1.performance.now();
await fn();
times.push(perf_hooks_1.performance.now() - start);
}
// Calculate statistics
times.sort((a, b) => a - b);
const sum = times.reduce((a, b) => a + b, 0);
return {
name,
iterations,
results: {
min: times[0],
max: times[times.length - 1],
avg: sum / times.length,
p50: times[Math.floor(times.length * 0.5)],
p95: times[Math.floor(times.length * 0.95)],
p99: times[Math.floor(times.length * 0.99)],
},
};
}
/**
* Benchmark ONNX embedding generation
*/
async function benchmarkEmbeddings(iterations = 10) {
const results = [];
// Initialize embedder
await (0, onnx_embedder_1.initOnnxEmbedder)();
const stats = (0, onnx_embedder_1.getStats)();
console.log(`\nπ ONNX Embedder: ${stats.dimension}d, SIMD: ${stats.simd}`);
// Single embedding benchmark
const singleResult = await runBenchmark('Single embedding (short text)', async () => {
await (0, onnx_embedder_1.embed)('This is a test sentence for embedding.');
}, iterations);
results.push(singleResult);
// Single embedding - long text
const longText = 'This is a much longer text that contains more content. '.repeat(20);
const singleLongResult = await runBenchmark('Single embedding (long text)', async () => {
await (0, onnx_embedder_1.embed)(longText);
}, iterations);
results.push(singleLongResult);
// Batch embedding - small batch
const smallBatch = Array(4).fill(0).map((_, i) => `Test sentence number ${i}`);
const batchSmallResult = await runBenchmark('Batch embedding (4 texts)', async () => {
await (0, onnx_embedder_1.embedBatch)(smallBatch);
}, iterations);
batchSmallResult.throughput = {
itemsPerSecond: (4 * 1000) / batchSmallResult.results.avg,
};
results.push(batchSmallResult);
// Batch embedding - medium batch
const mediumBatch = Array(16).fill(0).map((_, i) => `Test sentence number ${i} with some content`);
const batchMediumResult = await runBenchmark('Batch embedding (16 texts)', async () => {
await (0, onnx_embedder_1.embedBatch)(mediumBatch);
}, iterations);
batchMediumResult.throughput = {
itemsPerSecond: (16 * 1000) / batchMediumResult.results.avg,
};
results.push(batchMediumResult);
// Batch embedding - large batch
const largeBatch = Array(64).fill(0).map((_, i) => `Test sentence number ${i} with additional content here`);
const batchLargeResult = await runBenchmark('Batch embedding (64 texts)', async () => {
await (0, onnx_embedder_1.embedBatch)(largeBatch);
}, Math.min(iterations, 5) // Fewer iterations for large batches
);
batchLargeResult.throughput = {
itemsPerSecond: (64 * 1000) / batchLargeResult.results.avg,
};
results.push(batchLargeResult);
return results;
}
/**
* Benchmark worker execution
*/
async function benchmarkWorkers(targetPath = '.') {
const results = [];
// Security worker (no embeddings - fastest)
const securityWorker = (0, native_worker_1.createSecurityWorker)();
const securityResult = await runBenchmark('Security worker (no embeddings)', async () => {
await securityWorker.run(targetPath);
}, 5, 1);
results.push(securityResult);
// Analysis worker (with embeddings)
const analysisWorker = (0, native_worker_1.createAnalysisWorker)();
const analysisResult = await runBenchmark('Analysis worker (with embeddings)', async () => {
await analysisWorker.run(targetPath);
}, 3, 1);
results.push(analysisResult);
return results;
}
/**
* Benchmark individual phases
*/
async function benchmarkPhases(targetPath = '.') {
const results = [];
// File discovery phase only
const discoveryWorker = new native_worker_1.NativeWorker({
name: 'discovery-only',
phases: [{ type: 'file-discovery' }],
capabilities: {},
});
const discoveryResult = await runBenchmark('Phase: file-discovery', async () => {
await discoveryWorker.run(targetPath);
}, 10);
results.push(discoveryResult);
// Pattern extraction phase
const patternWorker = new native_worker_1.NativeWorker({
name: 'pattern-only',
phases: [{ type: 'file-discovery' }, { type: 'pattern-extraction' }],
capabilities: {},
});
const patternResult = await runBenchmark('Phase: pattern-extraction', async () => {
await patternWorker.run(targetPath);
}, 5);
results.push(patternResult);
// Embedding generation phase
const embeddingWorker = new native_worker_1.NativeWorker({
name: 'embedding-only',
phases: [
{ type: 'file-discovery', config: { patterns: ['**/*.ts'], exclude: ['**/node_modules/**'] } },
{ type: 'pattern-extraction' },
{ type: 'embedding-generation' },
],
capabilities: { onnxEmbeddings: true },
});
const embeddingResult = await runBenchmark('Phase: embedding-generation', async () => {
await embeddingWorker.run(targetPath);
}, 3, 1);
results.push(embeddingResult);
return results;
}
/**
* Format benchmark results as table
*/
function formatBenchmarkResults(results) {
const lines = [];
lines.push('');
lines.push('βββββββββββββββββββββββββββββββββββββββ¬βββββββββββ¬βββββββββββ¬βββββββββββ¬βββββββββββ¬βββββββββββββββ');
lines.push('β Benchmark β Min (ms) β Avg (ms) β P95 (ms) β Max (ms) β Throughput β');
lines.push('βββββββββββββββββββββββββββββββββββββββΌβββββββββββΌβββββββββββΌβββββββββββΌβββββββββββΌβββββββββββββββ€');
for (const result of results) {
const name = result.name.padEnd(35).slice(0, 35);
const min = result.results.min.toFixed(1).padStart(8);
const avg = result.results.avg.toFixed(1).padStart(8);
const p95 = result.results.p95.toFixed(1).padStart(8);
const max = result.results.max.toFixed(1).padStart(8);
const throughput = result.throughput
? `${result.throughput.itemsPerSecond.toFixed(1)}/s`.padStart(12)
: ' -';
lines.push(`β ${name} β ${min} β ${avg} β ${p95} β ${max} β ${throughput} β`);
}
lines.push('βββββββββββββββββββββββββββββββββββββββ΄βββββββββββ΄βββββββββββ΄βββββββββββ΄βββββββββββ΄βββββββββββββββ');
lines.push('');
return lines.join('\n');
}
/**
* Run full benchmark suite
*/
async function runFullBenchmark(targetPath = '.') {
console.log('π RuVector Native Worker Benchmark Suite\n');
console.log('='.repeat(60));
// Embeddings benchmark
console.log('\nπ Benchmarking ONNX Embeddings...');
const embeddings = await benchmarkEmbeddings(10);
console.log(formatBenchmarkResults(embeddings));
// Phases benchmark
console.log('\nβ‘ Benchmarking Individual Phases...');
const phases = await benchmarkPhases(targetPath);
console.log(formatBenchmarkResults(phases));
// Workers benchmark
console.log('\nπ§ Benchmarking Full Workers...');
const workers = await benchmarkWorkers(targetPath);
console.log(formatBenchmarkResults(workers));
// Summary
const stats = (0, onnx_embedder_1.getStats)();
const summary = `
RuVector Native Worker Benchmark Summary
========================================
ONNX Model: all-MiniLM-L6-v2 (${stats.dimension}d)
SIMD: ${stats.simd ? 'Enabled β' : 'Disabled'}
Parallel Workers: ${stats.parallel ? `${stats.parallelWorkers} workers` : 'Disabled'}
Embedding Performance:
Single: ${embeddings[0].results.avg.toFixed(1)}ms avg
Batch (16): ${embeddings[3].results.avg.toFixed(1)}ms avg (${embeddings[3].throughput?.itemsPerSecond.toFixed(0)}/s)
Batch (64): ${embeddings[4].results.avg.toFixed(1)}ms avg (${embeddings[4].throughput?.itemsPerSecond.toFixed(0)}/s)
Worker Performance:
Security scan: ${workers[0].results.avg.toFixed(0)}ms avg
Full analysis: ${workers[1].results.avg.toFixed(0)}ms avg
`;
console.log(summary);
return { embeddings, phases, workers, summary };
}
exports.default = {
benchmarkEmbeddings,
benchmarkWorkers,
benchmarkPhases,
runFullBenchmark,
formatBenchmarkResults,
};
|