starpacker52's picture
Add files using upload-large-folder tool
57c9ddf verified
Raw
History Blame Contribute Delete
4.17 kB
export type EvaluationWorkerRequest = {
taskId: string
command: string
args: string[]
timeoutMs?: number
}
export type EvaluationWorkerResult = {
taskId: string
exitCode: number
}
export type SpawnEvaluationWorker = (
request: EvaluationWorkerRequest,
) => Promise<EvaluationWorkerResult>
export type RunEvaluationBatchInput = {
taskIds: string[]
tasksDir: string
runsDir: string
maxRounds: number
maxTurnsPerRound?: number
timeoutSeconds: number
temperature: number
thinking: 'disabled' | 'adaptive'
systemPromptPath?: string
timestamp?: string
concurrency?: number
workerTimeoutGraceSeconds?: number
verbose: boolean
spawnWorker?: SpawnEvaluationWorker
}
export type RunEvaluationBatchResult = {
ok: boolean
workers: EvaluationWorkerResult[]
}
function pushOption(args: string[], name: string, value: string | number | undefined): void {
if (value === undefined) return
args.push(name, String(value))
}
function workerArgs(input: RunEvaluationBatchInput, taskId: string): string[] {
const args = [
'src/harness/evaluation/cli.ts',
'--worker-run',
'--agent-runtime',
'source',
'--task',
taskId,
]
pushOption(args, '--tasks-dir', input.tasksDir)
pushOption(args, '--runs-dir', input.runsDir)
pushOption(args, '--max-rounds', input.maxRounds)
pushOption(args, '--max-turns-per-round', input.maxTurnsPerRound)
pushOption(args, '--timeout-seconds', input.timeoutSeconds)
pushOption(args, '--temperature', input.temperature)
pushOption(args, '--thinking', input.thinking)
pushOption(args, '--system-prompt', input.systemPromptPath)
pushOption(args, '--timestamp', input.timestamp)
if (!input.verbose) args.push('--quiet')
return args
}
const defaultSpawnWorker: SpawnEvaluationWorker = request => {
const child = Bun.spawn([request.command, ...request.args], {
stdout: 'inherit',
stderr: 'inherit',
env: process.env,
})
const exited = child.exited.then(exitCode => ({
taskId: request.taskId,
exitCode,
}))
if (!request.timeoutMs) return exited
let timedOut = false
let timeoutTimer: ReturnType<typeof setTimeout> | undefined
let killTimer: ReturnType<typeof setTimeout> | undefined
const timeout = new Promise<EvaluationWorkerResult>(resolve => {
timeoutTimer = setTimeout(() => {
timedOut = true
child.kill('SIGTERM')
killTimer = setTimeout(() => child.kill('SIGKILL'), 5000)
killTimer.unref?.()
resolve({ taskId: request.taskId, exitCode: 124 })
}, request.timeoutMs)
timeoutTimer.unref?.()
})
void child.exited.finally(() => {
if (killTimer) clearTimeout(killTimer)
})
return Promise.race([exited, timeout]).finally(() => {
if (timeoutTimer) clearTimeout(timeoutTimer)
if (!timedOut && killTimer) clearTimeout(killTimer)
})
}
export async function runEvaluationBatch(
input: RunEvaluationBatchInput,
): Promise<RunEvaluationBatchResult> {
const spawnWorker = input.spawnWorker ?? defaultSpawnWorker
const command = process.execPath
const concurrency = Math.max(1, input.concurrency ?? 3)
const timeoutMs =
(input.timeoutSeconds + (input.workerTimeoutGraceSeconds ?? 60)) * 1000
const workers: Array<EvaluationWorkerResult | undefined> = new Array(
input.taskIds.length,
)
let nextIndex = 0
async function runLane(): Promise<void> {
for (;;) {
const index = nextIndex
nextIndex++
if (index >= input.taskIds.length) return
const taskId = input.taskIds[index]
try {
workers[index] = await spawnWorker({
taskId,
command,
args: workerArgs(input, taskId),
timeoutMs,
})
} catch {
workers[index] = { taskId, exitCode: 1 }
}
}
}
const laneCount = Math.min(concurrency, input.taskIds.length)
await Promise.all(Array.from({ length: laneCount }, () => runLane()))
const completedWorkers = workers.map((worker, index) =>
worker ?? {
taskId: input.taskIds[index],
exitCode: 1,
},
)
return {
ok: completedWorkers.every(worker => worker.exitCode === 0),
workers: completedWorkers,
}
}