Spaces:
Sleeping
Sleeping
File size: 991 Bytes
9967cb5 a448db8 9967cb5 a448db8 9967cb5 a448db8 9967cb5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 | name: codearena-rl-benchmark
description: "RL Benchmark for Autonomous Code Repair — iterative debugging with execution feedback"
version: "1.0.0"
entrypoint: server.app:CodeArenaEnv
runtime:
language: python
python_version: "3.11"
api:
reset: /reset
step: /step
state: /state
observation_space:
type: json
schema:
buggy_code: string
error_log: string
test_results: string
previous_attempts: list[string]
action_space:
type: json
schema:
proposed_fix: string
tasks:
- id: easy
path: tasks/easy.json
grader: server.grader:grade
- id: medium
path: tasks/medium.json
grader: server.grader:grade
- id: hard
path: tasks/hard.json
grader: server.grader:grade
- id: type_errors
path: tasks/type_errors/type_error_1.json
grader: server.grader:grade
- id: security_bugs
path: tasks/security_bugs/security_bug_1.json
grader: server.grader:grade
limits:
step_timeout_seconds: 2
max_runtime_minutes: 20
|