starpacker52's picture
Add files using upload-large-folder tool
57c9ddf verified
Raw
History Blame Contribute Delete
2.82 kB
import { describe, expect, test } from 'bun:test'
import { exitCodeForLoopStatus, parseEvaluationCliArgs } from './cli.js'
describe('parseEvaluationCliArgs', () => {
test('requires task id and parses loop controls', () => {
const parsed = parseEvaluationCliArgs([
'--task',
'demo_task',
'--max-rounds',
'4',
'--timeout-seconds',
'120',
'--concurrency',
'3',
'--worker-timeout-grace-seconds',
'15',
'--max-turns-per-round',
'9',
'--agent-runtime',
'source',
'--runs-dir',
'output/runs',
])
expect(parsed.taskId).toBe('demo_task')
expect(parsed.maxRounds).toBe(4)
expect(parsed.maxTurnsPerRound).toBe(9)
expect(parsed.timeoutSeconds).toBe(120)
expect(parsed.concurrency).toBe(3)
expect(parsed.workerTimeoutGraceSeconds).toBe(15)
expect(parsed.runsDir).toBe('output/runs')
expect(parsed.agentRuntime).toBe('source')
expect(parsed.temperature).toBe(1)
expect(parsed.thinking).toBe('disabled')
expect(parsed.taskIds).toEqual(['demo_task'])
expect(parsed.systemPromptPath).toBeUndefined()
})
test('parses repeated tasks for source batch mode', () => {
const parsed = parseEvaluationCliArgs([
'--task',
'task_a,task_b',
'--task',
'task_c',
'--quiet',
])
expect(parsed.taskId).toBe('task_a')
expect(parsed.taskIds).toEqual(['task_a', 'task_b', 'task_c'])
expect(parsed.maxTurnsPerRound).toBeUndefined()
expect(parsed.verbose).toBe(false)
})
test('rejects removed legacy subprocess runtime', () => {
expect(() =>
parseEvaluationCliArgs(['--task', 'demo_task', '--agent-runtime', 'legacy-subprocess']),
).toThrow('legacy-subprocess has been removed')
})
test('parses temperature and thinking controls', () => {
const parsed = parseEvaluationCliArgs([
'--task',
'demo_task',
'--temperature',
'0.2',
'--thinking',
'adaptive',
])
expect(parsed.temperature).toBe(0.2)
expect(parsed.thinking).toBe('adaptive')
})
test('rejects invalid temperature and thinking values', () => {
expect(() =>
parseEvaluationCliArgs(['--task', 'demo_task', '--temperature', 'hot']),
).toThrow('--temperature')
expect(() =>
parseEvaluationCliArgs(['--task', 'demo_task', '--temperature', '2']),
).toThrow('--temperature')
expect(() =>
parseEvaluationCliArgs(['--task', 'demo_task', '--thinking', 'enabled']),
).toThrow('--thinking')
})
test('maps loop status to process exit code', () => {
expect(exitCodeForLoopStatus('success')).toBe(0)
expect(exitCodeForLoopStatus('failed')).toBe(1)
expect(exitCodeForLoopStatus('timeout')).toBe(1)
expect(exitCodeForLoopStatus('infra_error')).toBe(1)
})
})