biodsbench-adapter / src /harness /evaluation /taskEnvironment.test.ts
starpacker52's picture
Add files using upload-large-folder tool
2c2dc59 verified
Raw
History Blame Contribute Delete
6 kB
import { describe, expect, test } from 'bun:test'
import { existsSync, mkdtempSync, readFileSync, rmSync, writeFileSync } from 'fs'
import { lstat, mkdir } from 'fs/promises'
import { tmpdir } from 'os'
import { join } from 'path'
import { createTaskRun } from './taskEnvironment.js'
async function makeTaskPrototype(root: string, taskId: string): Promise<string> {
const taskDir = join(root, taskId)
await mkdir(join(taskDir, 'visible_data'), { recursive: true })
await mkdir(join(taskDir, 'visible_data', 'cases', 'case_000', 'input_data'), {
recursive: true,
})
await mkdir(join(taskDir, 'evaluation', 'data'), { recursive: true })
await mkdir(join(taskDir, 'std_code'), { recursive: true })
await mkdir(join(taskDir, 'envs'), { recursive: true })
writeFileSync(join(taskDir, 'README.md'), 'public readme')
writeFileSync(join(taskDir, 'requirements.txt'), '')
writeFileSync(join(taskDir, 'output_schema.json'), '{"version":1}')
writeFileSync(join(taskDir, 'visible_data', 'cases.json'), '{"cases":[]}')
writeFileSync(
join(taskDir, 'visible_data', 'cases', 'case_000', 'input_data', 'positions.npy'),
'public positions',
)
writeFileSync(join(taskDir, 'evaluation', 'judge.py'), 'hidden judge')
writeFileSync(join(taskDir, 'std_code', 'main.py'), 'hidden solution')
writeFileSync(
join(taskDir, 'task_manifest.json'),
JSON.stringify({
version: 1,
task_id: taskId,
public_bundle: [
'README.md',
'requirements.txt',
'output_schema.json',
'visible_data/',
'envs/',
],
private_judge_bundle: ['evaluation/'],
entrypoints: {
judge: 'evaluation/judge.py',
cases: 'visible_data/cases.json',
output_schema: 'output_schema.json',
metrics: 'evaluation/metrics.json',
environment: 'envs/env_manifest.json',
},
}),
)
return taskDir
}
describe('createTaskRun', () => {
test('instantiates public workspace without exposing evaluation or std_code', async () => {
const temp = mkdtempSync(join(tmpdir(), 'task-env-'))
try {
const tasksDir = join(temp, 'tasks')
const runsDir = join(temp, 'runs')
await makeTaskPrototype(tasksDir, 'demo_task')
const run = await createTaskRun({
taskId: 'demo_task',
tasksDir,
runsDir,
timestamp: '20260511_010203',
})
expect(existsSync(join(run.publicDir, 'README.md'))).toBe(true)
expect(existsSync(join(run.publicDir, 'visible_data', 'cases.json'))).toBe(
true,
)
expect(existsSync(join(run.runDir, 'workspace'))).toBe(true)
expect(existsSync(join(run.runDir, 'workspace', 'plans'))).toBe(true)
expect(existsSync(join(run.runDir, 'workspace', 'experiments'))).toBe(true)
expect(existsSync(join(run.runDir, 'outputs'))).toBe(true)
expect(existsSync(join(run.runDir, 'logs', 'agent'))).toBe(true)
expect(existsSync(join(run.runDir, 'judge'))).toBe(false)
expect(existsSync(run.judgeDir)).toBe(true)
expect(run.judgeDir.startsWith(run.runDir)).toBe(false)
expect(existsSync(join(run.publicDir, 'evaluation'))).toBe(false)
expect(existsSync(join(run.publicDir, 'std_code'))).toBe(false)
expect(existsSync(join(run.publicDir, 'HARNESS_HINTS.md'))).toBe(false)
const manifest = JSON.parse(
readFileSync(join(run.runDir, 'run_manifest.json'), 'utf8'),
)
expect(manifest.task_id).toBe('demo_task')
expect(manifest.public_dir).toBe('public')
expect(manifest.workspace_dir).toBe('workspace')
} finally {
rmSync(temp, { recursive: true, force: true })
}
})
test('accepts UTF-8 BOM in task_manifest.json', async () => {
const temp = mkdtempSync(join(tmpdir(), 'task-env-bom-'))
try {
const tasksDir = join(temp, 'tasks')
const runsDir = join(temp, 'runs')
await makeTaskPrototype(tasksDir, 'bom_task')
const manifestPath = join(tasksDir, 'bom_task', 'task_manifest.json')
const manifest = readFileSync(manifestPath, 'utf8')
writeFileSync(manifestPath, `\ufeff${manifest}`, 'utf8')
const run = await createTaskRun({
taskId: 'bom_task',
tasksDir,
runsDir,
timestamp: '20260511_040506',
})
expect(existsSync(join(run.publicDir, 'README.md'))).toBe(true)
} finally {
rmSync(temp, { recursive: true, force: true })
}
})
test('links host runtime venv instead of copying venv internals', async () => {
const temp = mkdtempSync(join(tmpdir(), 'task-env-runtime-link-'))
try {
const tasksDir = join(temp, 'tasks')
const runsDir = join(temp, 'runs')
const taskDir = await makeTaskPrototype(tasksDir, 'runtime_link_task')
const venvName = process.platform === 'win32' ? '.venv' : '.venv-posix'
const pythonRel =
process.platform === 'win32'
? 'envs/runtime/.venv/Scripts/python.exe'
: 'envs/runtime/.venv-posix/bin/python'
const pythonAbs = join(taskDir, ...pythonRel.split('/'))
await mkdir(join(pythonAbs, '..'), { recursive: true })
writeFileSync(pythonAbs, '')
writeFileSync(
join(taskDir, 'envs', 'env_manifest.json'),
JSON.stringify({
version: 1,
default_env: 'runtime',
envs: {
runtime: {
python: {
[process.platform === 'win32' ? 'windows' : 'posix']: pythonRel,
},
},
},
}),
)
const run = await createTaskRun({
taskId: 'runtime_link_task',
tasksDir,
runsDir,
timestamp: '20260511_070809',
})
const linkedVenv = join(run.publicDir, 'envs', 'runtime', venvName)
expect(existsSync(join(run.publicDir, ...pythonRel.split('/')))).toBe(true)
expect((await lstat(linkedVenv)).isSymbolicLink()).toBe(true)
} finally {
rmSync(temp, { recursive: true, force: true })
}
})
})