#!/usr/bin/env node // scripts/bench_pipeline.mjs // Quick micro-benchmark for the pipeline using mock providers. // Measures throughput (questions/sec) over a limited run. import { performance } from 'perf_hooks'; import path from 'path'; import os from 'os'; import { fileURLToPath } from 'url'; import { runPipelineBatch } from '../src/pipeline/pipeline.mjs'; const __filename = fileURLToPath(import.meta.url); const __dirname = path.dirname(__filename); const PROJECT_ROOT = path.join(__dirname, '..'); function parseArgs(argv) { const args = argv.slice(2); let limit = 50; let chunkLimit; let cacheDir; let randomWalk = false; for (let i = 0; i < args.length; i++) { const a = args[i]; if (a === '--limit' || a === '-n') { const v = Number(args[i + 1]); if (!Number.isNaN(v)) limit = v; i++; } else if (a === '--chunk-limit') { const v = Number(args[i + 1]); if (!Number.isNaN(v)) chunkLimit = v; i++; } else if (a === '--cache-dir') { cacheDir = args[i + 1]; i++; } else if (a === '--random-walk') { randomWalk = true; } } return { limit, chunkLimit, cacheDir, randomWalk }; } function bar(label, fraction, width = 30) { const clamped = Math.max(0, Math.min(1, fraction)); const filled = Math.round(clamped * width); const empty = width - filled; return `${label} [${'#'.repeat(filled)}${'.'.repeat(empty)}] ${(clamped * 100).toFixed(1)}%`; } async function main() { const { limit, chunkLimit, cacheDir, randomWalk } = parseArgs(process.argv); // Force mock providers for speed and determinism process.env.GENERATOR_PROVIDER = 'mock'; process.env.VERIFIER_PROVIDER = 'mock'; process.env.REWARD_PROVIDER = 'mock'; process.env.QUESTION_PROVIDER = 'mock'; process.env.PROVIDER_TYPE = 'mock'; // Seed mode: question-first avoids ES by using rag chunks JSONL process.env.PIPELINE_SEED_MODE = 'question-first'; // Optional random walk over chunks if (randomWalk) process.env.PIPELINE_RANDOM_WALK = '1'; // Isolate cache/output const cachePath = cacheDir || path.join(os.tmpdir(), `distill-cache-bench-${Date.now()}`); process.env.PIPELINE_CACHE_DIR = cachePath; const outPath = path.join( os.tmpdir(), `pipeline_gold_bench_${Date.now()}.jsonl`, ); console.log('🏎️ Benchmarking pipeline (mock providers)'); console.log(` limit: ${limit}`); console.log(` chunkLimit: ${chunkLimit ?? 'default'}`); console.log(` randomWalk: ${randomWalk ? 'yes' : 'no'}`); console.log(` cache: ${cachePath}`); console.log(` out: ${outPath}`); console.log(''); const start = performance.now(); const silentLogger = { log: () => {}, error: console.error }; const result = await runPipelineBatch({ limit, chunkLimit, verbose: false, outPath, seedMode: 'question-first', logger: silentLogger, }); const end = performance.now(); const ms = end - start; const qps = result.processed > 0 ? (result.processed / ms) * 1000 : 0; const acceptRatio = result.processed > 0 ? result.accepted / result.processed : 0; console.log('🎯 Benchmark complete'); console.log(` mode: ${result.mode}`); console.log(` processed: ${result.processed}`); console.log(` accepted: ${result.accepted}`); console.log(` duration: ${ms.toFixed(1)} ms`); console.log(` throughput: ${qps.toFixed(2)} q/s`); console.log(` ${bar('accept rate ', acceptRatio)}`); console.log(` ${bar('throughput ', Math.min(1, qps / 50))} (normalized vs 50 q/s)`); console.log(` cache dir: ${cachePath}`); console.log(` out file: ${outPath}`); } main().catch((err) => { console.error('Benchmark error:', err); process.exit(1); });