Spaces:
Sleeping
Sleeping
| name: codearena-rl-benchmark | |
| description: "RL Benchmark for Autonomous Code Repair — iterative debugging with execution feedback" | |
| version: "1.0.0" | |
| entrypoint: server.app:CodeArenaEnv | |
| runtime: | |
| language: python | |
| python_version: "3.11" | |
| api: | |
| reset: /reset | |
| step: /step | |
| state: /state | |
| observation_space: | |
| type: json | |
| schema: | |
| buggy_code: string | |
| error_log: string | |
| test_results: string | |
| previous_attempts: list[string] | |
| action_space: | |
| type: json | |
| schema: | |
| proposed_fix: string | |
| tasks: | |
| - id: easy | |
| path: tasks/easy.json | |
| grader: server.grader:grade | |
| - id: medium | |
| path: tasks/medium.json | |
| grader: server.grader:grade | |
| - id: hard | |
| path: tasks/hard.json | |
| grader: server.grader:grade | |
| - id: type_errors | |
| path: tasks/type_errors/type_error_1.json | |
| grader: server.grader:grade | |
| - id: security_bugs | |
| path: tasks/security_bugs/security_bug_1.json | |
| grader: server.grader:grade | |
| limits: | |
| step_timeout_seconds: 2 | |
| max_runtime_minutes: 20 | |