| import { describe, expect, test } from 'bun:test' |
| import { exitCodeForLoopStatus, parseEvaluationCliArgs } from './cli.js' |
|
|
| describe('parseEvaluationCliArgs', () => { |
| test('requires task id and parses loop controls', () => { |
| const parsed = parseEvaluationCliArgs([ |
| '--task', |
| 'demo_task', |
| '--max-rounds', |
| '4', |
| '--timeout-seconds', |
| '120', |
| '--concurrency', |
| '3', |
| '--worker-timeout-grace-seconds', |
| '15', |
| '--max-turns-per-round', |
| '9', |
| '--agent-runtime', |
| 'source', |
| '--runs-dir', |
| 'output/runs', |
| ]) |
|
|
| expect(parsed.taskId).toBe('demo_task') |
| expect(parsed.maxRounds).toBe(4) |
| expect(parsed.maxTurnsPerRound).toBe(9) |
| expect(parsed.timeoutSeconds).toBe(120) |
| expect(parsed.concurrency).toBe(3) |
| expect(parsed.workerTimeoutGraceSeconds).toBe(15) |
| expect(parsed.runsDir).toBe('output/runs') |
| expect(parsed.agentRuntime).toBe('source') |
| expect(parsed.temperature).toBe(1) |
| expect(parsed.thinking).toBe('disabled') |
| expect(parsed.taskIds).toEqual(['demo_task']) |
| expect(parsed.systemPromptPath).toBeUndefined() |
| }) |
|
|
| test('parses repeated tasks for source batch mode', () => { |
| const parsed = parseEvaluationCliArgs([ |
| '--task', |
| 'task_a,task_b', |
| '--task', |
| 'task_c', |
| '--quiet', |
| ]) |
|
|
| expect(parsed.taskId).toBe('task_a') |
| expect(parsed.taskIds).toEqual(['task_a', 'task_b', 'task_c']) |
| expect(parsed.maxTurnsPerRound).toBeUndefined() |
| expect(parsed.verbose).toBe(false) |
| }) |
|
|
| test('rejects removed legacy subprocess runtime', () => { |
| expect(() => |
| parseEvaluationCliArgs(['--task', 'demo_task', '--agent-runtime', 'legacy-subprocess']), |
| ).toThrow('legacy-subprocess has been removed') |
| }) |
|
|
| test('parses temperature and thinking controls', () => { |
| const parsed = parseEvaluationCliArgs([ |
| '--task', |
| 'demo_task', |
| '--temperature', |
| '0.2', |
| '--thinking', |
| 'adaptive', |
| ]) |
|
|
| expect(parsed.temperature).toBe(0.2) |
| expect(parsed.thinking).toBe('adaptive') |
| }) |
|
|
| test('rejects invalid temperature and thinking values', () => { |
| expect(() => |
| parseEvaluationCliArgs(['--task', 'demo_task', '--temperature', 'hot']), |
| ).toThrow('--temperature') |
| expect(() => |
| parseEvaluationCliArgs(['--task', 'demo_task', '--temperature', '2']), |
| ).toThrow('--temperature') |
| expect(() => |
| parseEvaluationCliArgs(['--task', 'demo_task', '--thinking', 'enabled']), |
| ).toThrow('--thinking') |
| }) |
|
|
| test('maps loop status to process exit code', () => { |
| expect(exitCodeForLoopStatus('success')).toBe(0) |
| expect(exitCodeForLoopStatus('failed')).toBe(1) |
| expect(exitCodeForLoopStatus('timeout')).toBe(1) |
| expect(exitCodeForLoopStatus('infra_error')).toBe(1) |
| }) |
| }) |
|
|