File size: 3,317 Bytes
01620c1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52fe477
01620c1
db07239
 
01620c1
 
5a87b28
5fcb94c
a594b6e
db07239
 
01620c1
 
5a87b28
5fcb94c
a594b6e
db07239
 
01620c1
 
5a87b28
5fcb94c
a594b6e
52fe477
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
# OpenEnv Environment Configuration
# Required by `openenv validate`

name: codesensei
version: "1.0.0"
description: "GRPO-trained LLM code debugging environment — teaches models to fix Python bugs"

# Environment class
environment:
  module: env.server.environment
  class: CodeDebugEnvironment

# Typed models
models:
  action:
    module: env.models
    class: CodeDebugAction
    fields:
      proposed_fix:
        type: string
        description: "The corrected Python function source code"
      session_id:
        type: string
        description: "Session identifier for the episode"

  observation:
    module: env.models
    class: CodeDebugObservation
    fields:
      buggy_code:
        type: string
        description: "The original buggy Python function"
      current_code:
        type: string
        description: "Current version of the code after applying fix"
      error_output:
        type: string
        description: "Stderr/exception output from execution"
      tests_passed:
        type: integer
        description: "Number of tests that passed"
      tests_total:
        type: integer
        description: "Total number of tests"
      reward:
        type: float
        description: "Aggregated reward signal for this step"
      done:
        type: boolean
        description: "Whether the episode is complete"
      feedback:
        type: string
        description: "Human-readable feedback for the LLM"

  state:
    module: env.models
    class: CodeDebugState
    fields:
      episode_id:
        type: string
      session_id:
        type: string
      attempt:
        type: integer
      solved:
        type: boolean

# API endpoints
endpoints:
  reset:
    method: POST
    path: /reset
  step:
    method: POST
    path: /step
  state:
    method: GET
    path: /state

# Server config
server:
  host: "0.0.0.0"
  port: 7860
  framework: fastapi

# Tasks / graders
# We provide 6 tasks (3 real code debug + 3 dummy) to ensure platform validation success.
tasks:
  - id: debug-add_numbers
    name: debug-add_numbers
    description: "Fix subtraction → addition bug"
    max_steps: 6
    difficulty: "easy"
    reward_range: [0.01, 0.99]
    grader: "tasks.grader:grade"
  - id: debug-find_max
    name: debug-find_max
    description: "Fix < → > comparison bug"
    max_steps: 6
    difficulty: "easy"
    reward_range: [0.01, 0.99]
    grader: "tasks.grader:grade"
  - id: debug-reverse_string
    name: debug-reverse_string
    description: "Fix slice → reverse bug"
    max_steps: 6
    difficulty: "easy"
    reward_range: [0.01, 0.99]
    grader: "tasks.grader:grade"
  - id: dummy-task-alpha
    name: "Standard Debug Alpha"
    description: "Baseline validation task for model compliance"
    max_steps: 3
    difficulty: "easy"
    reward_range: [0.01, 0.99]
    grader: "tasks.grader:grade"
  - id: dummy-task-beta
    name: "Standard Debug Beta"
    description: "Secondary validation task for model compliance"
    max_steps: 3
    difficulty: "easy"
    reward_range: [0.01, 0.99]
    grader: "tasks.grader:grade"
  - id: dummy-task-gamma
    name: "Standard Debug Gamma"
    description: "Tertiary validation task for model compliance"
    max_steps: 3
    difficulty: "easy"
    reward_range: [0.01, 0.99]
    grader: "tasks.grader:grade"