distill-pipeline / scripts /bench_pipeline.mjs
htaf's picture
Add CI, licences, samples, and benchmark scripts
b2f1284
#!/usr/bin/env node
// scripts/bench_pipeline.mjs
// Quick micro-benchmark for the pipeline using mock providers.
// Measures throughput (questions/sec) over a limited run.
import { performance } from 'perf_hooks';
import path from 'path';
import os from 'os';
import { fileURLToPath } from 'url';
import { runPipelineBatch } from '../src/pipeline/pipeline.mjs';
const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);
const PROJECT_ROOT = path.join(__dirname, '..');
function parseArgs(argv) {
const args = argv.slice(2);
let limit = 50;
let chunkLimit;
let cacheDir;
let randomWalk = false;
for (let i = 0; i < args.length; i++) {
const a = args[i];
if (a === '--limit' || a === '-n') {
const v = Number(args[i + 1]);
if (!Number.isNaN(v)) limit = v;
i++;
} else if (a === '--chunk-limit') {
const v = Number(args[i + 1]);
if (!Number.isNaN(v)) chunkLimit = v;
i++;
} else if (a === '--cache-dir') {
cacheDir = args[i + 1];
i++;
} else if (a === '--random-walk') {
randomWalk = true;
}
}
return { limit, chunkLimit, cacheDir, randomWalk };
}
function bar(label, fraction, width = 30) {
const clamped = Math.max(0, Math.min(1, fraction));
const filled = Math.round(clamped * width);
const empty = width - filled;
return `${label} [${'#'.repeat(filled)}${'.'.repeat(empty)}] ${(clamped * 100).toFixed(1)}%`;
}
async function main() {
const { limit, chunkLimit, cacheDir, randomWalk } = parseArgs(process.argv);
// Force mock providers for speed and determinism
process.env.GENERATOR_PROVIDER = 'mock';
process.env.VERIFIER_PROVIDER = 'mock';
process.env.REWARD_PROVIDER = 'mock';
process.env.QUESTION_PROVIDER = 'mock';
process.env.PROVIDER_TYPE = 'mock';
// Seed mode: question-first avoids ES by using rag chunks JSONL
process.env.PIPELINE_SEED_MODE = 'question-first';
// Optional random walk over chunks
if (randomWalk) process.env.PIPELINE_RANDOM_WALK = '1';
// Isolate cache/output
const cachePath =
cacheDir ||
path.join(os.tmpdir(), `distill-cache-bench-${Date.now()}`);
process.env.PIPELINE_CACHE_DIR = cachePath;
const outPath = path.join(
os.tmpdir(),
`pipeline_gold_bench_${Date.now()}.jsonl`,
);
console.log('🏎️ Benchmarking pipeline (mock providers)');
console.log(` limit: ${limit}`);
console.log(` chunkLimit: ${chunkLimit ?? 'default'}`);
console.log(` randomWalk: ${randomWalk ? 'yes' : 'no'}`);
console.log(` cache: ${cachePath}`);
console.log(` out: ${outPath}`);
console.log('');
const start = performance.now();
const silentLogger = { log: () => {}, error: console.error };
const result = await runPipelineBatch({
limit,
chunkLimit,
verbose: false,
outPath,
seedMode: 'question-first',
logger: silentLogger,
});
const end = performance.now();
const ms = end - start;
const qps = result.processed > 0 ? (result.processed / ms) * 1000 : 0;
const acceptRatio = result.processed > 0 ? result.accepted / result.processed : 0;
console.log('🎯 Benchmark complete');
console.log(` mode: ${result.mode}`);
console.log(` processed: ${result.processed}`);
console.log(` accepted: ${result.accepted}`);
console.log(` duration: ${ms.toFixed(1)} ms`);
console.log(` throughput: ${qps.toFixed(2)} q/s`);
console.log(` ${bar('accept rate ', acceptRatio)}`);
console.log(` ${bar('throughput ', Math.min(1, qps / 50))} (normalized vs 50 q/s)`);
console.log(` cache dir: ${cachePath}`);
console.log(` out file: ${outPath}`);
}
main().catch((err) => {
console.error('Benchmark error:', err);
process.exit(1);
});