File size: 991 Bytes
9967cb5
 
 
a448db8
9967cb5
 
 
 
 
 
 
 
 
 
a448db8
 
 
 
 
 
 
 
 
 
 
 
 
9967cb5
 
 
 
 
 
 
 
 
 
a448db8
 
 
 
 
 
9967cb5
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
name: codearena-rl-benchmark
description: "RL Benchmark for Autonomous Code Repair — iterative debugging with execution feedback"
version: "1.0.0"
entrypoint: server.app:CodeArenaEnv

runtime:
  language: python
  python_version: "3.11"

api:
  reset: /reset
  step: /step
  state: /state

observation_space:
  type: json
  schema:
    buggy_code: string
    error_log: string
    test_results: string
    previous_attempts: list[string]

action_space:
  type: json
  schema:
    proposed_fix: string

tasks:
  - id: easy
    path: tasks/easy.json
    grader: server.grader:grade
  - id: medium
    path: tasks/medium.json
    grader: server.grader:grade
  - id: hard
    path: tasks/hard.json
    grader: server.grader:grade
  - id: type_errors
    path: tasks/type_errors/type_error_1.json
    grader: server.grader:grade
  - id: security_bugs
    path: tasks/security_bugs/security_bug_1.json
    grader: server.grader:grade

limits:
  step_timeout_seconds: 2
  max_runtime_minutes: 20