Sneha Rudra commited on
Commit
1e3b07a
·
0 Parent(s):

Initial commit: Code Debugging Challenge OpenEnv environment

Browse files
.gitignore ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ __pycache__/
2
+ *.py[cod]
3
+ *$py.class
4
+ *.so
5
+ .Python
6
+ build/
7
+ develop-eggs/
8
+ dist/
9
+ downloads/
10
+ eggs/
11
+ .eggs/
12
+ lib/
13
+ lib64/
14
+ parts/
15
+ sdist/
16
+ var/
17
+ wheels/
18
+ *.egg-info/
19
+ .installed.cfg
20
+ *.egg
21
+ .pytest_cache/
22
+ .coverage
23
+ htmlcov/
24
+ .env
25
+ .venv
26
+ env/
27
+ venv/
README.md ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Code Debugging Challenge
3
+ emoji: 🐛
4
+ colorFrom: blue
5
+ colorTo: purple
6
+ sdk: docker
7
+ pinned: false
8
+ license: apache-2.0
9
+ tags:
10
+ - openenv
11
+ - reinforcement-learning
12
+ - code-debugging
13
+ - agentic-ai
14
+ ---
15
+
16
+ # 🐛 Code Debugging Challenge - OpenEnv Environment
17
+
18
+ A production-ready OpenEnv environment where AI agents learn to debug Python code.
19
+
20
+ ## 🎯 Overview
21
+
22
+ This environment challenges AI agents to identify and fix bugs in Python code snippets using the official **OpenEnv framework** from Meta-PyTorch and Hugging Face.
23
+
24
+ **Key Features:**
25
+ - ✅ Built with official OpenEnv library
26
+ - ✅ WebSocket-based client-server architecture
27
+ - ✅ Docker containerized for isolation
28
+ - ✅ Compatible with TRL, Torchforge, and other RL frameworks
29
+ - ✅ Production-ready with proper session management
30
+
31
+ ## 🏗️ Environment Details
32
+
33
+ - **Action Space**: 4 discrete actions (analyze, fix, test, submit)
34
+ - **Observation Space**: Structured observations with code, errors, and feedback
35
+ - **Reward Structure**:
36
+ - +1.0 for successful fix
37
+ - -0.2 to -0.5 for failed attempts
38
+ - +0.1 for analysis actions
39
+ - -1.0 for premature submission
40
+ - **Episode Length**: Max 5 attempts per bug
41
+
42
+ ## 🐞 Bug Types Included
43
+
44
+ 1. **Argument Count Errors** - Wrong number of function arguments
45
+ 2. **Logic Errors** - Incorrect loop variables and conditions
46
+ 3. **Exception Handling** - Missing error handling for edge cases
47
+ 4. **Index Errors** - Array/string index out of bounds
48
+ 5. **Infinite Recursion** - Recursive calls without base case reduction
49
+ 6. **Type Errors** - String/integer concatenation issues
50
+ 7. **Key Errors** - Missing dictionary keys
51
+
52
+ ## 🚀 Quick Start
53
+
54
+ ### Using Docker (Recommended)
55
+
56
+ ```python
57
+ from code_debug_env.client import DebugEnv
58
+
59
+ # Automatically starts Docker container and connects
60
+ env = DebugEnv.from_hub("openenv/code-debug-env")
61
+
62
+ # Reset to get first challenge
63
+ result = env.reset()
64
+ print(result.observation.buggy_code)
65
+ print(f"Expected output: {result.observation.expected_output}")
66
+
67
+ # Take action
68
+ from code_debug_env.models import DebugAction
69
+ action = DebugAction(action_type="test")
70
+ result = env.step(action)
71
+ print(f"Reward: {result.reward}")
72
+
73
+ # Cleanup
74
+ env.close()
75
+ ```
76
+
77
+ ## 🔧 Integration with RL Frameworks
78
+
79
+ ### With TRL (Transformer Reinforcement Learning)
80
+
81
+ ```python
82
+ from trl import OnlineDPOConfig, OnlineDPOTrainer
83
+ from code_debug_env.client import DebugEnv
84
+
85
+ config = OnlineDPOConfig(...)
86
+ trainer = OnlineDPOTrainer(
87
+ config=config,
88
+ env=DebugEnv.from_hub("openenv/code-debug-env"),
89
+ # ... other args
90
+ )
91
+ trainer.train()
92
+ ```
93
+
94
+ ## 🏆 OpenEnv Challenge Submission
95
+
96
+ This environment is submitted to the **OpenEnv Challenge: SOTA Environments to Drive General Intelligence** (UC Berkeley AgentBeats Competition).
97
+
98
+ ## 📜 License
99
+
100
+ Apache 2.0
client.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ WebSocket client for Code Debugging Challenge environment.
3
+ """
4
+
5
+ from openenv.core.env_client import EnvClient
6
+ from openenv.core.client_types import StepResult, ResetResult
7
+ from .models import DebugAction, DebugObservation, DebugState
8
+
9
+
10
+ class DebugEnv(EnvClient[DebugAction, DebugObservation, DebugState]):
11
+ """Client for interacting with Code Debugging Challenge environment."""
12
+
13
+ def _step_payload(self, action: DebugAction) -> dict:
14
+ """Convert action to JSON payload for server."""
15
+ return {
16
+ "action_type": action.action_type,
17
+ "content": action.content
18
+ }
19
+
20
+ def _parse_result(self, data: dict) -> StepResult[DebugObservation]:
21
+ """Parse step response from server into typed result."""
22
+ obs_data = data["observation"]
23
+
24
+ observation = DebugObservation(
25
+ buggy_code=obs_data["buggy_code"],
26
+ expected_output=obs_data["expected_output"],
27
+ test_inputs=obs_data.get("test_inputs", []),
28
+ current_output=obs_data.get("current_output"),
29
+ error_message=obs_data.get("error_message"),
30
+ attempts_remaining=obs_data["attempts_remaining"],
31
+ hint=obs_data.get("hint"),
32
+ success=obs_data.get("success", False)
33
+ )
34
+
35
+ return StepResult(
36
+ observation=observation,
37
+ reward=data["reward"],
38
+ terminated=data["terminated"],
39
+ truncated=data["truncated"],
40
+ info=data.get("info", {})
41
+ )
42
+
43
+ def _parse_reset_result(self, data: dict) -> ResetResult[DebugObservation]:
44
+ """Parse reset response from server into typed result."""
45
+ obs_data = data["observation"]
46
+
47
+ observation = DebugObservation(
48
+ buggy_code=obs_data["buggy_code"],
49
+ expected_output=obs_data["expected_output"],
50
+ test_inputs=obs_data.get("test_inputs", []),
51
+ attempts_remaining=obs_data.get("attempts_remaining", 5),
52
+ success=False
53
+ )
54
+
55
+ return ResetResult(
56
+ observation=observation,
57
+ info=data.get("info", {})
58
+ )
59
+
60
+ def _parse_state(self, data: dict) -> DebugState:
61
+ """Parse state response from server into typed state."""
62
+ return DebugState(
63
+ current_problem_index=data.get("current_problem_index", 0),
64
+ attempts_made=data.get("attempts_made", 0),
65
+ max_attempts=data.get("max_attempts", 5),
66
+ score=data.get("score", 0.0),
67
+ solved=data.get("solved", False),
68
+ total_problems=data.get("total_problems", 7),
69
+ episode_id=data.get("episode_id", "")
70
+ )
models.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Data models for Code Debugging Challenge environment.
3
+ """
4
+
5
+ from dataclasses import dataclass, field
6
+ from typing import Optional, Literal
7
+ from openenv.core.env_server import Action, Observation, State
8
+
9
+
10
+ @dataclass
11
+ class DebugAction(Action):
12
+ """Actions the agent can take in the debugging environment."""
13
+
14
+ action_type: Literal["analyze", "fix", "test", "submit"]
15
+ content: Optional[str] = None
16
+
17
+ def __post_init__(self):
18
+ """Validate action consistency."""
19
+ if self.action_type == "fix" and self.content is None:
20
+ raise ValueError("fix action requires content")
21
+
22
+
23
+ @dataclass
24
+ class DebugObservation(Observation):
25
+ """Observations returned to the agent after each step."""
26
+
27
+ buggy_code: str
28
+ expected_output: str
29
+ test_inputs: list[str] = field(default_factory=list)
30
+ current_output: Optional[str] = None
31
+ error_message: Optional[str] = None
32
+ attempts_remaining: int = 5
33
+ hint: Optional[str] = None
34
+ success: bool = False
35
+
36
+
37
+ @dataclass
38
+ class DebugState(State):
39
+ """Internal state tracking for the environment."""
40
+
41
+ current_problem_index: int = 0
42
+ attempts_made: int = 0
43
+ max_attempts: int = 5
44
+ score: float = 0.0
45
+ solved: bool = False
46
+ total_problems: int = 7
47
+ episode_id: str = ""
pyproject.toml ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [project]
2
+ name = "code-debug-env"
3
+ version = "1.0.0"
4
+ description = "OpenEnv environment for training agents to debug Python code"
5
+ readme = "README.md"
6
+ requires-python = ">=3.10"
7
+ license = {text = "Apache-2.0"}
8
+ keywords = ["openenv", "reinforcement-learning", "debugging", "ai-agents"]
9
+
10
+ dependencies = [
11
+ "openenv-core>=0.1.1",
12
+ ]
13
+
14
+ [project.optional-dependencies]
15
+ dev = [
16
+ "pytest>=7.0.0",
17
+ "pytest-asyncio>=0.21.0",
18
+ "black>=23.0.0",
19
+ "ruff>=0.1.0",
20
+ ]
21
+
22
+ [build-system]
23
+ requires = ["hatchling"]
24
+ build-backend = "hatchling.build"
server/Dockerfile ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM ghcr.io/meta-pytorch/openenv-base:latest AS base
2
+
3
+ WORKDIR /app
4
+
5
+ COPY pyproject.toml README.md ./
6
+ COPY models.py ./
7
+ COPY client.py ./
8
+ COPY server/ ./server/
9
+
10
+ RUN pip install --no-cache-dir -e .
11
+
12
+ EXPOSE 8000
13
+
14
+ WORKDIR /app/server
15
+
16
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8000"]
server/__init__.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ """Code Debugging Challenge environment server."""
2
+
3
+ from .debug_environment import DebugEnvironment
4
+
5
+ __all__ = ["DebugEnvironment"]
server/app.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ FastAPI server for Code Debugging Challenge environment.
3
+ """
4
+
5
+ from openenv.core.env_server import create_app
6
+ from ..models import DebugAction, DebugObservation
7
+ from .debug_environment import DebugEnvironment
8
+
9
+
10
+ def create_debug_environment():
11
+ """Factory function to create environment instances."""
12
+ return DebugEnvironment()
13
+
14
+
15
+ # Create FastAPI app with OpenEnv integration
16
+ app = create_app(
17
+ create_debug_environment,
18
+ DebugAction,
19
+ DebugObservation,
20
+ env_name="code_debug_env"
21
+ )
22
+
23
+
24
+ if __name__ == "__main__":
25
+ import uvicorn
26
+ uvicorn.run(app, host="0.0.0.0", port=8000)
server/debug_environment.py ADDED
@@ -0,0 +1,253 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Core environment logic for Code Debugging Challenge.
3
+ """
4
+
5
+ import uuid
6
+ import random
7
+ import sys
8
+ from io import StringIO
9
+ from typing import Optional
10
+ from openenv.core.env_server import Environment
11
+ from ..models import DebugAction, DebugObservation, DebugState
12
+
13
+
14
+ # Bug database with various Python bugs
15
+ BUG_DATABASE = [
16
+ {
17
+ "buggy_code": "def add_numbers(a, b):\n return a + b\n\nresult = add_numbers(5)\nprint(result)",
18
+ "fixed_code": "def add_numbers(a, b):\n return a + b\n\nresult = add_numbers(5, 3)\nprint(result)",
19
+ "expected_output": "8",
20
+ "test_inputs": [],
21
+ "hint": "Function is called with wrong number of arguments",
22
+ "bug_type": "argument_count"
23
+ },
24
+ {
25
+ "buggy_code": "numbers = [1, 2, 3, 4, 5]\ntotal = 0\nfor i in range(len(numbers)):\n total += i\nprint(total)",
26
+ "fixed_code": "numbers = [1, 2, 3, 4, 5]\ntotal = 0\nfor i in range(len(numbers)):\n total += numbers[i]\nprint(total)",
27
+ "expected_output": "15",
28
+ "test_inputs": [],
29
+ "hint": "Loop variable is not being used correctly",
30
+ "bug_type": "logic_error"
31
+ },
32
+ {
33
+ "buggy_code": "def divide(a, b):\n return a / b\n\nprint(divide(10, 0))",
34
+ "fixed_code": "def divide(a, b):\n if b == 0:\n return 'Error: Division by zero'\n return a / b\n\nprint(divide(10, 0))",
35
+ "expected_output": "Error: Division by zero",
36
+ "test_inputs": [],
37
+ "hint": "Need to handle edge case when dividing by zero",
38
+ "bug_type": "exception_handling"
39
+ },
40
+ {
41
+ "buggy_code": "text = 'Hello World'\nprint(text[100])",
42
+ "fixed_code": "text = 'Hello World'\nif len(text) > 100:\n print(text[100])\nelse:\n print('Index out of range')",
43
+ "expected_output": "Index out of range",
44
+ "test_inputs": [],
45
+ "hint": "Index is out of bounds for the string",
46
+ "bug_type": "index_error"
47
+ },
48
+ {
49
+ "buggy_code": "def factorial(n):\n if n == 0:\n return 1\n return n * factorial(n)\n\nprint(factorial(5))",
50
+ "fixed_code": "def factorial(n):\n if n == 0:\n return 1\n return n * factorial(n - 1)\n\nprint(factorial(5))",
51
+ "expected_output": "120",
52
+ "test_inputs": [],
53
+ "hint": "Recursive call is not reducing the problem size",
54
+ "bug_type": "infinite_recursion"
55
+ },
56
+ {
57
+ "buggy_code": "name = 'Alice'\nage = 25\nprint('My name is ' + name + ' and I am ' + age + ' years old')",
58
+ "fixed_code": "name = 'Alice'\nage = 25\nprint('My name is ' + name + ' and I am ' + str(age) + ' years old')",
59
+ "expected_output": "My name is Alice and I am 25 years old",
60
+ "test_inputs": [],
61
+ "hint": "Cannot concatenate string and integer directly",
62
+ "bug_type": "type_error"
63
+ },
64
+ {
65
+ "buggy_code": "my_dict = {'a': 1, 'b': 2}\nprint(my_dict['c'])",
66
+ "fixed_code": "my_dict = {'a': 1, 'b': 2}\nprint(my_dict.get('c', 'Key not found'))",
67
+ "expected_output": "Key not found",
68
+ "test_inputs": [],
69
+ "hint": "Key does not exist in dictionary",
70
+ "bug_type": "key_error"
71
+ },
72
+ ]
73
+
74
+
75
+ class DebugEnvironment(Environment):
76
+ """Code Debugging Challenge Environment."""
77
+
78
+ supports_concurrent_sessions = True
79
+
80
+ def __init__(self):
81
+ super().__init__()
82
+ self._state = DebugState(
83
+ episode_id=str(uuid.uuid4()),
84
+ total_problems=len(BUG_DATABASE)
85
+ )
86
+ self.current_problem = None
87
+
88
+ def reset(self) -> DebugObservation:
89
+ """Reset environment and return initial observation."""
90
+ self._state = DebugState(
91
+ episode_id=str(uuid.uuid4()),
92
+ total_problems=len(BUG_DATABASE)
93
+ )
94
+ self.current_problem = random.choice(BUG_DATABASE)
95
+
96
+ return DebugObservation(
97
+ buggy_code=self.current_problem["buggy_code"],
98
+ expected_output=self.current_problem["expected_output"],
99
+ test_inputs=self.current_problem.get("test_inputs", []),
100
+ attempts_remaining=self._state.max_attempts,
101
+ success=False
102
+ )
103
+
104
+ def step(self, action: DebugAction) -> DebugObservation:
105
+ """Execute one step in the environment."""
106
+ self._state.attempts_made += 1
107
+
108
+ if action.action_type == "analyze":
109
+ return self._handle_analyze()
110
+ elif action.action_type == "fix":
111
+ return self._handle_fix(action.content)
112
+ elif action.action_type == "test":
113
+ return self._handle_test()
114
+ elif action.action_type == "submit":
115
+ return self._handle_submit()
116
+ else:
117
+ return DebugObservation(
118
+ buggy_code=self.current_problem["buggy_code"],
119
+ expected_output=self.current_problem["expected_output"],
120
+ test_inputs=self.current_problem.get("test_inputs", []),
121
+ error_message="Invalid action type",
122
+ attempts_remaining=self._state.max_attempts - self._state.attempts_made,
123
+ success=False
124
+ )
125
+
126
+ def _handle_analyze(self) -> DebugObservation:
127
+ """Handle analyze action."""
128
+ return DebugObservation(
129
+ buggy_code=self.current_problem["buggy_code"],
130
+ expected_output=self.current_problem["expected_output"],
131
+ test_inputs=self.current_problem.get("test_inputs", []),
132
+ attempts_remaining=self._state.max_attempts - self._state.attempts_made,
133
+ success=False
134
+ )
135
+
136
+ def _handle_fix(self, code_fix: Optional[str]) -> DebugObservation:
137
+ """Handle fix action."""
138
+ if code_fix is None:
139
+ return DebugObservation(
140
+ buggy_code=self.current_problem["buggy_code"],
141
+ expected_output=self.current_problem["expected_output"],
142
+ test_inputs=self.current_problem.get("test_inputs", []),
143
+ error_message="No fix provided",
144
+ attempts_remaining=self._state.max_attempts - self._state.attempts_made,
145
+ success=False
146
+ )
147
+
148
+ output, error = self._execute_code(code_fix)
149
+
150
+ if error:
151
+ return DebugObservation(
152
+ buggy_code=self.current_problem["buggy_code"],
153
+ expected_output=self.current_problem["expected_output"],
154
+ test_inputs=self.current_problem.get("test_inputs", []),
155
+ current_output=output,
156
+ error_message=error,
157
+ attempts_remaining=self._state.max_attempts - self._state.attempts_made,
158
+ hint=self.current_problem["hint"] if self._state.attempts_made >= 2 else None,
159
+ success=False
160
+ )
161
+
162
+ if output.strip() == self.current_problem["expected_output"].strip():
163
+ self._state.solved = True
164
+ self._state.score += 1.0
165
+ return DebugObservation(
166
+ buggy_code=self.current_problem["buggy_code"],
167
+ expected_output=self.current_problem["expected_output"],
168
+ test_inputs=self.current_problem.get("test_inputs", []),
169
+ current_output=output,
170
+ attempts_remaining=self._state.max_attempts - self._state.attempts_made,
171
+ success=True
172
+ )
173
+ else:
174
+ return DebugObservation(
175
+ buggy_code=self.current_problem["buggy_code"],
176
+ expected_output=self.current_problem["expected_output"],
177
+ test_inputs=self.current_problem.get("test_inputs", []),
178
+ current_output=output,
179
+ error_message=f"Output mismatch. Got: {output.strip()}, Expected: {self.current_problem['expected_output'].strip()}",
180
+ attempts_remaining=self._state.max_attempts - self._state.attempts_made,
181
+ hint=self.current_problem["hint"] if self._state.attempts_made >= 2 else None,
182
+ success=False
183
+ )
184
+
185
+ def _handle_test(self) -> DebugObservation:
186
+ """Handle test action - run the buggy code to see the error."""
187
+ output, error = self._execute_code(self.current_problem["buggy_code"])
188
+ return DebugObservation(
189
+ buggy_code=self.current_problem["buggy_code"],
190
+ expected_output=self.current_problem["expected_output"],
191
+ test_inputs=self.current_problem.get("test_inputs", []),
192
+ current_output=output,
193
+ error_message=error,
194
+ attempts_remaining=self._state.max_attempts - self._state.attempts_made,
195
+ success=False
196
+ )
197
+
198
+ def _handle_submit(self) -> DebugObservation:
199
+ """Handle early submission without fixing."""
200
+ return DebugObservation(
201
+ buggy_code=self.current_problem["buggy_code"],
202
+ expected_output=self.current_problem["expected_output"],
203
+ test_inputs=self.current_problem.get("test_inputs", []),
204
+ attempts_remaining=0,
205
+ success=False
206
+ )
207
+
208
+ def _execute_code(self, code: str) -> tuple[str, Optional[str]]:
209
+ """Safely execute code and capture output/errors."""
210
+ old_stdout = sys.stdout
211
+ sys.stdout = StringIO()
212
+
213
+ try:
214
+ exec(code, {})
215
+ output = sys.stdout.getvalue()
216
+ error = None
217
+ except Exception as e:
218
+ output = sys.stdout.getvalue()
219
+ error = f"{type(e).__name__}: {str(e)}"
220
+ finally:
221
+ sys.stdout = old_stdout
222
+
223
+ return output, error
224
+
225
+ @property
226
+ def state(self) -> DebugState:
227
+ """Return current environment state."""
228
+ return self._state
229
+
230
+ def reward(self, observation: DebugObservation) -> float:
231
+ """Compute reward based on observation."""
232
+ if observation.success:
233
+ return 1.0
234
+ if observation.error_message and "Error:" not in observation.error_message:
235
+ return -0.3
236
+ if observation.current_output and not observation.success:
237
+ return -0.2
238
+ if observation.current_output is None and observation.error_message is None:
239
+ return 0.1
240
+ if observation.attempts_remaining == 0 and not observation.success:
241
+ return -1.0
242
+ if observation.error_message == "No fix provided":
243
+ return -0.5
244
+ return 0.0
245
+
246
+ def terminated(self, observation: DebugObservation) -> bool:
247
+ """Episode terminates on success or max attempts."""
248
+ return observation.success or self._state.attempts_made >= self._state.max_attempts
249
+
250
+ def truncated(self, observation: DebugObservation) -> bool:
251
+ """Episode is truncated if max attempts reached without success."""
252
+ return (self._state.attempts_made >= self._state.max_attempts
253
+ and not observation.success)
tests/test_environment.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Tests for Code Debugging Challenge environment."""
2
+
3
+ import pytest
4
+ from server.debug_environment import DebugEnvironment
5
+ from models import DebugAction
6
+
7
+
8
+ def test_environment_reset():
9
+ """Test environment reset."""
10
+ env = DebugEnvironment()
11
+ obs = env.reset()
12
+
13
+ assert obs.buggy_code is not None
14
+ assert obs.expected_output is not None
15
+ assert obs.attempts_remaining == 5
16
+ assert not obs.success
17
+
18
+
19
+ def test_analyze_action():
20
+ """Test analyze action."""
21
+ env = DebugEnvironment()
22
+ env.reset()
23
+
24
+ action = DebugAction(action_type="analyze")
25
+ obs = env.step(action)
26
+ reward = env.reward(obs)
27
+
28
+ assert reward == 0.1
29
+ assert not env.terminated(obs)
30
+
31
+
32
+ def test_successful_fix():
33
+ """Test successful bug fix."""
34
+ env = DebugEnvironment()
35
+ env.reset()
36
+
37
+ correct_fix = env.current_problem["fixed_code"]
38
+ action = DebugAction(action_type="fix", content=correct_fix)
39
+ obs = env.step(action)
40
+ reward = env.reward(obs)
41
+
42
+ assert obs.success
43
+ assert reward == 1.0
44
+ assert env.terminated(obs)
45
+
46
+
47
+ if __name__ == "__main__":
48
+ pytest.main([__file__, "-v"])