codearena-rl / openenv.yaml
havinashpatil
Complete all tasks: Adaptive curriculum, GRPO, React frontend, LLM-as-a-judge
a448db8
raw
history blame contribute delete
991 Bytes
name: codearena-rl-benchmark
description: "RL Benchmark for Autonomous Code Repair — iterative debugging with execution feedback"
version: "1.0.0"
entrypoint: server.app:CodeArenaEnv
runtime:
language: python
python_version: "3.11"
api:
reset: /reset
step: /step
state: /state
observation_space:
type: json
schema:
buggy_code: string
error_log: string
test_results: string
previous_attempts: list[string]
action_space:
type: json
schema:
proposed_fix: string
tasks:
- id: easy
path: tasks/easy.json
grader: server.grader:grade
- id: medium
path: tasks/medium.json
grader: server.grader:grade
- id: hard
path: tasks/hard.json
grader: server.grader:grade
- id: type_errors
path: tasks/type_errors/type_error_1.json
grader: server.grader:grade
- id: security_bugs
path: tasks/security_bugs/security_bug_1.json
grader: server.grader:grade
limits:
step_timeout_seconds: 2
max_runtime_minutes: 20