import { describe, expect, test } from 'bun:test' import { runEvaluationBatch, type SpawnEvaluationWorker } from './batchRunner.js' describe('runEvaluationBatch', () => { test('spawns one worker process per task in source batch mode', async () => { const spawned: Array<{ command: string; args: string[]; timeoutMs?: number }> = [] const spawnWorker: SpawnEvaluationWorker = async request => { spawned.push({ command: request.command, args: request.args, timeoutMs: request.timeoutMs, }) return { taskId: request.taskId, exitCode: 0 } } const result = await runEvaluationBatch({ taskIds: ['task_a', 'task_b'], tasksDir: 'tasks', runsDir: 'output/runs', maxRounds: 2, maxTurnsPerRound: 9, timeoutSeconds: 120, concurrency: 3, workerTimeoutGraceSeconds: 10, temperature: 0.2, thinking: 'adaptive', timestamp: '20260513_010203', systemPromptPath: 'config/debug-prompt.md', verbose: false, spawnWorker, }) expect(result.ok).toBe(true) expect(spawned).toHaveLength(2) expect(spawned[0].args).toContain('--worker-run') expect(spawned[0].args).toContain('--task') expect(spawned[0].args).toContain('task_a') expect(spawned[0].args).toContain('--max-turns-per-round') expect(spawned[0].args).toContain('9') expect(spawned[0].args).toContain('--temperature') expect(spawned[0].args).toContain('0.2') expect(spawned[0].args).toContain('--thinking') expect(spawned[0].args).toContain('adaptive') expect(spawned[0].timeoutMs).toBe(130000) expect(spawned[1].args).toContain('task_b') }) test('runs workers as a fixed-size pipeline', async () => { const started: string[] = [] const resolvers = new Map void>() const spawnWorker: SpawnEvaluationWorker = request => { started.push(request.taskId) return new Promise(resolve => { resolvers.set(request.taskId, exitCode => resolve({ taskId: request.taskId, exitCode }), ) }) } const running = runEvaluationBatch({ taskIds: ['a', 'b', 'c', 'd', 'e'], tasksDir: 'tasks', runsDir: 'output/runs', maxRounds: 1, timeoutSeconds: 120, concurrency: 3, temperature: 1, thinking: 'disabled', verbose: false, spawnWorker, }) await Promise.resolve() expect(started).toEqual(['a', 'b', 'c']) resolvers.get('b')?.(0) await Promise.resolve() expect(started).toEqual(['a', 'b', 'c', 'd']) resolvers.get('a')?.(0) await Promise.resolve() expect(started).toEqual(['a', 'b', 'c', 'd', 'e']) for (const taskId of ['c', 'd', 'e']) { resolvers.get(taskId)?.(0) } const result = await running expect(result.ok).toBe(true) expect(result.workers.map(worker => worker.taskId)).toEqual(['a', 'b', 'c', 'd', 'e']) }) test('continues launching queued workers after a worker fails', async () => { const started: string[] = [] const spawnWorker: SpawnEvaluationWorker = async request => { started.push(request.taskId) return { taskId: request.taskId, exitCode: request.taskId === 'a' ? 1 : 0 } } const result = await runEvaluationBatch({ taskIds: ['a', 'b', 'c'], tasksDir: 'tasks', runsDir: 'output/runs', maxRounds: 1, timeoutSeconds: 120, concurrency: 1, temperature: 1, thinking: 'disabled', verbose: false, spawnWorker, }) expect(started).toEqual(['a', 'b', 'c']) expect(result.ok).toBe(false) expect(result.workers.map(worker => worker.exitCode)).toEqual([1, 0, 0]) }) test('passes worker watchdog timeout to spawned workers', async () => { const timeouts: Array = [] const spawnWorker: SpawnEvaluationWorker = async request => { timeouts.push(request.timeoutMs) return { taskId: request.taskId, exitCode: 0 } } await runEvaluationBatch({ taskIds: ['task_a'], tasksDir: 'tasks', runsDir: 'output/runs', maxRounds: 1, timeoutSeconds: 10, workerTimeoutGraceSeconds: 7, concurrency: 3, temperature: 1, thinking: 'disabled', verbose: false, spawnWorker, }) expect(timeouts).toEqual([17000]) }) })