Viraaj Sawant commited on
Commit
8a4b89f
·
0 Parent(s):

Initial push of Mini RL Env

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitignore +8 -0
  2. prompts.py +37 -0
  3. requirements.txt +26 -0
  4. rl_code_fix_env/.dockerignore +45 -0
  5. rl_code_fix_env/.gitignore +8 -0
  6. rl_code_fix_env/README.md +255 -0
  7. rl_code_fix_env/__init__.py +14 -0
  8. rl_code_fix_env/client.py +185 -0
  9. rl_code_fix_env/conftest.py +38 -0
  10. rl_code_fix_env/dataset/README.md +20 -0
  11. rl_code_fix_env/dataset/__init__.py +1 -0
  12. rl_code_fix_env/dataset/loader.py +111 -0
  13. rl_code_fix_env/dataset/problem_1/buggy.py +5 -0
  14. rl_code_fix_env/dataset/problem_1/metadata.json +5 -0
  15. rl_code_fix_env/dataset/problem_1/test.py +14 -0
  16. rl_code_fix_env/dataset/problem_10/buggy.py +8 -0
  17. rl_code_fix_env/dataset/problem_10/helpers.py +2 -0
  18. rl_code_fix_env/dataset/problem_10/metadata.json +5 -0
  19. rl_code_fix_env/dataset/problem_10/test.py +12 -0
  20. rl_code_fix_env/dataset/problem_11/buggy.py +14 -0
  21. rl_code_fix_env/dataset/problem_11/metadata.json +5 -0
  22. rl_code_fix_env/dataset/problem_11/test.py +17 -0
  23. rl_code_fix_env/dataset/problem_12/buggy.py +11 -0
  24. rl_code_fix_env/dataset/problem_12/metadata.json +5 -0
  25. rl_code_fix_env/dataset/problem_12/test.py +14 -0
  26. rl_code_fix_env/dataset/problem_13/buggy.py +10 -0
  27. rl_code_fix_env/dataset/problem_13/cache.py +20 -0
  28. rl_code_fix_env/dataset/problem_13/metadata.json +5 -0
  29. rl_code_fix_env/dataset/problem_13/test.py +13 -0
  30. rl_code_fix_env/dataset/problem_14/buggy.py +6 -0
  31. rl_code_fix_env/dataset/problem_14/metadata.json +5 -0
  32. rl_code_fix_env/dataset/problem_14/test.py +15 -0
  33. rl_code_fix_env/dataset/problem_15/buggy.py +4 -0
  34. rl_code_fix_env/dataset/problem_15/metadata.json +5 -0
  35. rl_code_fix_env/dataset/problem_15/test.py +14 -0
  36. rl_code_fix_env/dataset/problem_16/buggy.py +10 -0
  37. rl_code_fix_env/dataset/problem_16/helpers.py +3 -0
  38. rl_code_fix_env/dataset/problem_16/metadata.json +5 -0
  39. rl_code_fix_env/dataset/problem_16/test.py +12 -0
  40. rl_code_fix_env/dataset/problem_17/buggy.py +11 -0
  41. rl_code_fix_env/dataset/problem_17/metadata.json +5 -0
  42. rl_code_fix_env/dataset/problem_17/test.py +11 -0
  43. rl_code_fix_env/dataset/problem_18/buggy.py +14 -0
  44. rl_code_fix_env/dataset/problem_18/math_utils.py +6 -0
  45. rl_code_fix_env/dataset/problem_18/metadata.json +5 -0
  46. rl_code_fix_env/dataset/problem_18/test.py +14 -0
  47. rl_code_fix_env/dataset/problem_19/buggy.py +36 -0
  48. rl_code_fix_env/dataset/problem_19/metadata.json +5 -0
  49. rl_code_fix_env/dataset/problem_19/test.py +48 -0
  50. rl_code_fix_env/dataset/problem_2/buggy.py +5 -0
.gitignore ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ *.pdf
2
+ venv/
3
+ .venv/
4
+ __pycache__/
5
+ .env
6
+ commands.md
7
+ logs.md
8
+ inference&docker.md
prompts.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ LLM_SCORER_PROMPT = """
2
+ You are a reward model for an autonomous code bug-fixing agent trained with reinforcement learning.
3
+ Your scores are used directly as a learning signal — be precise, consistent, and strict.
4
+
5
+ You will receive:
6
+ - ORIGINAL: the buggy code before the agent's fix
7
+ - PATCHED: the code after the agent applied its patch
8
+
9
+ Evaluate the agent's fix on exactly three axes, each scored 0.0–10.0:
10
+
11
+ 1. CORRECTNESS — Does the patch fix the bug(s) without introducing new ones?
12
+ Full marks only if the fix is semantically correct and complete.
13
+ Penalise partial fixes, over-patches, or fixes that mask rather than resolve the root cause.
14
+
15
+ 2. MINIMALITY — Is the diff minimal? Penalise unnecessary refactors, renames, whitespace-only changes,
16
+ or reformatting of lines unrelated to the bug.
17
+
18
+ 3. QUALITY — Is the patched code readable and idiomatic? Penalise: broken naming conventions,
19
+ added dead code, removed necessary comments, or degraded clarity vs. the original.
20
+
21
+ Respond ONLY with this JSON — no preamble, no trailing text:
22
+ {
23
+ "correctness": <float 0.0-10.0>,
24
+ "minimality": <float 0.0-10.0>,
25
+ "quality": <float 0.0-10.0>,
26
+ "reasoning": "<one concise sentence per axis, pipe-separated>"
27
+ }
28
+ """
29
+
30
+
31
+ USER_TEMPLATE ="""
32
+ ORIGINAL:
33
+ ```python
34
+ {original_code}
35
+ ```
36
+ Return only the JSON.
37
+ """
requirements.txt ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ datasets
3
+ gymnasium
4
+ rich
5
+ tqdm
6
+ matplotlib
7
+ seaborn
8
+
9
+ pandas
10
+ numpy
11
+ openenv-core
12
+ fastapi
13
+ requests
14
+ uvicorn
15
+ pydantic
16
+ streamlit
17
+
18
+ groq
19
+ langchain
20
+ langchain-core
21
+ huggingface_hub
22
+
23
+ loguru
24
+ pytest
25
+ unidiff
26
+ diff-match-patch
rl_code_fix_env/.dockerignore ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Virtual environments (Windows/Linux/Mac)
2
+ .venv/
3
+ venv/
4
+ env/
5
+ ENV/
6
+ .env
7
+
8
+ # Python cache & compiled
9
+ __pycache__/
10
+ *.pyc
11
+ *.pyo
12
+ *.pyd
13
+ .Python
14
+ *.egg-info/
15
+ dist/
16
+ build/
17
+ *.egg
18
+ venv
19
+ .venv
20
+
21
+ # Testing & coverage
22
+ .pytest_cache/
23
+ .coverage
24
+ htmlcov/
25
+
26
+ # IDE & editor
27
+ .vscode/
28
+ .idea/
29
+ *.swp
30
+ *.swo
31
+ *~
32
+ .DS_Store
33
+
34
+ # Version control
35
+ .git/
36
+ .gitignore
37
+
38
+ # Build/cache
39
+ .mypy_cache/
40
+ *.log
41
+
42
+ # Docker
43
+ Dockerfile
44
+ .dockerignore
45
+ docker-compose.yml
rl_code_fix_env/.gitignore ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ *.pdf
2
+ venv/
3
+ .venv/
4
+ __pycache__/
5
+ .env
6
+ *.pyc
7
+ *.egg
8
+ pytest-cache-files-*/
rl_code_fix_env/README.md ADDED
@@ -0,0 +1,255 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Rl Code Fix Env Environment Server
3
+ emoji:
4
+ colorFrom: green
5
+ colorTo: purple
6
+ sdk: docker
7
+ pinned: false
8
+ app_port: 8000
9
+ base_path: /web
10
+ tags:
11
+ - openenv
12
+ ---
13
+
14
+ # Rl Code Fix Env Environment
15
+
16
+ A simple test environment that echoes back messages. Perfect for testing the env APIs as well as demonstrating environment usage patterns.
17
+
18
+ ## Quick Start
19
+
20
+ The simplest way to use the Rl Code Fix Env environment is through the `RlCodeFixEnv` class:
21
+
22
+ ```python
23
+ from rl_code_fix_env import RlCodeFixAction, RlCodeFixEnv
24
+
25
+ try:
26
+ # Create environment from Docker image
27
+ rl_code_fix_envenv = RlCodeFixEnv.from_docker_image("rl_code_fix_env-env:latest")
28
+
29
+ # Reset
30
+ result = rl_code_fix_envenv.reset()
31
+ print(f"Reset: {result.observation.echoed_message}")
32
+
33
+ # Send multiple messages
34
+ messages = ["Hello, World!", "Testing echo", "Final message"]
35
+
36
+ for msg in messages:
37
+ result = rl_code_fix_envenv.step(RlCodeFixAction(message=msg))
38
+ print(f"Sent: '{msg}'")
39
+ print(f" Echoed: '{result.observation.echoed_message}'")
40
+ print(f" Length: {result.observation.message_length}")
41
+ print(f" Reward: {result.reward}")
42
+
43
+ finally:
44
+ # Always clean up
45
+ rl_code_fix_envenv.close()
46
+ ```
47
+
48
+ That's it! The `RlCodeFixEnv.from_docker_image()` method handles:
49
+ - Starting the Docker container
50
+ - Waiting for the server to be ready
51
+ - Connecting to the environment
52
+ - Container cleanup when you call `close()`
53
+
54
+ ## Building the Docker Image
55
+
56
+ Before using the environment, you need to build the Docker image:
57
+
58
+ ```bash
59
+ # From project root
60
+ docker build -t rl_code_fix_env-env:latest -f server/Dockerfile .
61
+ ```
62
+
63
+ ## Deploying to Hugging Face Spaces
64
+
65
+ You can easily deploy your OpenEnv environment to Hugging Face Spaces using the `openenv push` command:
66
+
67
+ ```bash
68
+ # From the environment directory (where openenv.yaml is located)
69
+ openenv push
70
+
71
+ # Or specify options
72
+ openenv push --namespace my-org --private
73
+ ```
74
+
75
+ The `openenv push` command will:
76
+ 1. Validate that the directory is an OpenEnv environment (checks for `openenv.yaml`)
77
+ 2. Prepare a custom build for Hugging Face Docker space (enables web interface)
78
+ 3. Upload to Hugging Face (ensuring you're logged in)
79
+
80
+ ### Prerequisites
81
+
82
+ - Authenticate with Hugging Face: The command will prompt for login if not already authenticated
83
+
84
+ ### Options
85
+
86
+ - `--directory`, `-d`: Directory containing the OpenEnv environment (defaults to current directory)
87
+ - `--repo-id`, `-r`: Repository ID in format 'username/repo-name' (defaults to 'username/env-name' from openenv.yaml)
88
+ - `--base-image`, `-b`: Base Docker image to use (overrides Dockerfile FROM)
89
+ - `--private`: Deploy the space as private (default: public)
90
+
91
+ ### Examples
92
+
93
+ ```bash
94
+ # Push to your personal namespace (defaults to username/env-name from openenv.yaml)
95
+ openenv push
96
+
97
+ # Push to a specific repository
98
+ openenv push --repo-id my-org/my-env
99
+
100
+ # Push with a custom base image
101
+ openenv push --base-image ghcr.io/meta-pytorch/openenv-base:latest
102
+
103
+ # Push as a private space
104
+ openenv push --private
105
+
106
+ # Combine options
107
+ openenv push --repo-id my-org/my-env --base-image custom-base:latest --private
108
+ ```
109
+
110
+ After deployment, your space will be available at:
111
+ `https://huggingface.co/spaces/<repo-id>`
112
+
113
+ The deployed space includes:
114
+ - **Web Interface** at `/web` - Interactive UI for exploring the environment
115
+ - **API Documentation** at `/docs` - Full OpenAPI/Swagger interface
116
+ - **Health Check** at `/health` - Container health monitoring
117
+ - **WebSocket** at `/ws` - Persistent session endpoint for low-latency interactions
118
+
119
+ ## Environment Details
120
+
121
+ ### Action
122
+ **RlCodeFixAction**: Contains a single field
123
+ - `message` (str) - The message to echo back
124
+
125
+ ### Observation
126
+ **RlCodeFixObservation**: Contains the echo response and metadata
127
+ - `echoed_message` (str) - The message echoed back
128
+ - `message_length` (int) - Length of the message
129
+ - `reward` (float) - Reward based on message length (length 0.1)
130
+ - `done` (bool) - Always False for echo environment
131
+ - `metadata` (dict) - Additional info like step count
132
+
133
+ ### Reward
134
+ The reward is calculated as: `message_length 0.1`
135
+ - "Hi" reward: 0.2
136
+ - "Hello, World!" reward: 1.3
137
+ - Empty message reward: 0.0
138
+
139
+ ## Advanced Usage
140
+
141
+ ### Connecting to an Existing Server
142
+
143
+ If you already have a Rl Code Fix Env environment server running, you can connect directly:
144
+
145
+ ```python
146
+ from rl_code_fix_env import RlCodeFixEnv
147
+
148
+ # Connect to existing server
149
+ rl_code_fix_envenv = RlCodeFixEnv(base_url="<ENV_HTTP_URL_HERE>")
150
+
151
+ # Use as normal
152
+ result = rl_code_fix_envenv.reset()
153
+ result = rl_code_fix_envenv.step(RlCodeFixAction(message="Hello!"))
154
+ ```
155
+
156
+ Note: When connecting to an existing server, `rl_code_fix_envenv.close()` will NOT stop the server.
157
+
158
+ ### Using the Context Manager
159
+
160
+ The client supports context manager usage for automatic connection management:
161
+
162
+ ```python
163
+ from rl_code_fix_env import RlCodeFixAction, RlCodeFixEnv
164
+
165
+ # Connect with context manager (auto-connects and closes)
166
+ with RlCodeFixEnv(base_url="http://localhost:8000") as env:
167
+ result = env.reset()
168
+ print(f"Reset: {result.observation.echoed_message}")
169
+ # Multiple steps with low latency
170
+ for msg in ["Hello", "World", "!"]:
171
+ result = env.step(RlCodeFixAction(message=msg))
172
+ print(f"Echoed: {result.observation.echoed_message}")
173
+ ```
174
+
175
+ The client uses WebSocket connections for:
176
+ - **Lower latency**: No HTTP connection overhead per request
177
+ - **Persistent session**: Server maintains your environment state
178
+ - **Efficient for episodes**: Better for many sequential steps
179
+
180
+ ### Concurrent WebSocket Sessions
181
+
182
+ The server supports multiple concurrent WebSocket connections. To enable this,
183
+ modify `server/app.py` to use factory mode:
184
+
185
+ ```python
186
+ # In server/app.py - use factory mode for concurrent sessions
187
+ app = create_app(
188
+ RlCodeFixEnvironment, # Pass class, not instance
189
+ RlCodeFixAction,
190
+ RlCodeFixObservation,
191
+ max_concurrent_envs=4, # Allow 4 concurrent sessions
192
+ )
193
+ ```
194
+
195
+ Then multiple clients can connect simultaneously:
196
+
197
+ ```python
198
+ from rl_code_fix_env import RlCodeFixAction, RlCodeFixEnv
199
+ from concurrent.futures import ThreadPoolExecutor
200
+
201
+ def run_episode(client_id: int):
202
+ with RlCodeFixEnv(base_url="http://localhost:8000") as env:
203
+ result = env.reset()
204
+ for i in range(10):
205
+ result = env.step(RlCodeFixAction(message=f"Client {client_id}, step {i}"))
206
+ return client_id, result.observation.message_length
207
+
208
+ # Run 4 episodes concurrently
209
+ with ThreadPoolExecutor(max_workers=4) as executor:
210
+ results = list(executor.map(run_episode, range(4)))
211
+ ```
212
+
213
+ ## Development & Testing
214
+
215
+ ### Direct Environment Testing
216
+
217
+ Test the environment logic directly without starting the HTTP server:
218
+
219
+ ```bash
220
+ # From the server directory
221
+ python3 server/rl_code_fix_env_environment.py
222
+ ```
223
+
224
+ This verifies that:
225
+ - Environment resets correctly
226
+ - Step executes actions properly
227
+ - State tracking works
228
+ - Rewards are calculated correctly
229
+
230
+ ### Running Locally
231
+
232
+ Run the server locally for development:
233
+
234
+ ```bash
235
+ uvicorn server.app:app --reload
236
+ ```
237
+
238
+ ## Project Structure
239
+
240
+ ```
241
+ rl_code_fix_env/
242
+ .dockerignore # Docker build exclusions
243
+ __init__.py # Module exports
244
+ README.md # This file
245
+ openenv.yaml # OpenEnv manifest
246
+ pyproject.toml # Project metadata and dependencies
247
+ uv.lock # Locked dependencies (generated)
248
+ client.py # RlCodeFixEnv client
249
+ models.py # Action and Observation models
250
+ server/
251
+ __init__.py # Server module exports
252
+ rl_code_fix_env_environment.py # Core environment logic
253
+ app.py # FastAPI application (HTTP + WebSocket endpoints)
254
+ Dockerfile # Container image definition
255
+ ```
rl_code_fix_env/__init__.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ """Rl Code Fix Env Environment."""
8
+
9
+ from .models import CodeFixerAction, CodeFixerObservation
10
+
11
+ __all__ = [
12
+ "CodeFixerAction",
13
+ "CodeFixerObservation",
14
+ ]
rl_code_fix_env/client.py ADDED
@@ -0,0 +1,185 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ """Code Fixer Environment Client."""
8
+
9
+ import asyncio
10
+ import inspect
11
+ import logging
12
+ from typing import Dict
13
+
14
+ from openenv.core import EnvClient
15
+ from openenv.core.client_types import StepResult
16
+ from openenv.core.env_server.types import State
17
+
18
+ from rl_code_fix_env.models import CodeFixerAction, CodeFixerObservation
19
+
20
+ log = logging.getLogger(__name__)
21
+
22
+ class CodeFixerEnv(
23
+ EnvClient[CodeFixerAction, CodeFixerObservation, State]
24
+ ):
25
+ """
26
+ Client for the Code Fixer Environment.
27
+
28
+ This client maintains a persistent WebSocket connection to the environment server,
29
+ enabling efficient multi-step interactions with lower latency.
30
+ Each client instance has its own dedicated environment session on the server.
31
+
32
+ Example:
33
+ >>> # Connect to a running server
34
+ >>> with CodeFixerEnv(base_url="http://localhost:8000") as client:
35
+ ... result = client.reset()
36
+ ... print(result.observation.code)
37
+ ...
38
+ ... result = client.step(CodeFixerAction(type="run_tests"))
39
+ ... print(result.observation.test_passed)
40
+
41
+ Example with Docker:
42
+ >>> # Automatically start container and connect
43
+ >>> client = CodeFixerEnv.from_docker_image("code_fixer-env:latest")
44
+ >>> try:
45
+ ... result = client.reset()
46
+ ... result = client.step(CodeFixerAction(type="run_tests"))
47
+ ... finally:
48
+ ... client.close()
49
+ """
50
+
51
+ def __init__(self, *args, **kwargs):
52
+ super().__init__(*args, **kwargs)
53
+ self._loop = asyncio.new_event_loop()
54
+ # Store init args for reconnection
55
+ self._init_args = args
56
+ self._init_kwargs = kwargs
57
+
58
+ def _run_sync(self, result):
59
+ """Run coroutine results on this client's dedicated event loop."""
60
+ if inspect.iscoroutine(result):
61
+ return self._loop.run_until_complete(result)
62
+ return result
63
+
64
+ def _reconnect(self) -> None:
65
+ """
66
+ Tear down the dead event loop and WebSocket connection, then
67
+ re-initialise so the next call works cleanly.
68
+
69
+ Called automatically by reset() and step() when a 1011 / timeout
70
+ error is detected after an idle period.
71
+ """
72
+ log.warning("[CodeFixerEnv] WebSocket timed out reconnecting...")
73
+ # Close the old loop gracefully
74
+ try:
75
+ self._run_sync(super().close())
76
+ except Exception:
77
+ pass
78
+ if not self._loop.is_closed():
79
+ self._loop.close()
80
+
81
+ # Re-initialise: fresh loop + fresh base-class state
82
+ self._loop = asyncio.new_event_loop()
83
+ super().__init__(*self._init_args, **self._init_kwargs)
84
+ log.warning("[CodeFixerEnv] Reconnected successfully.")
85
+
86
+ @staticmethod
87
+ def _is_reconnectable_ws_error(exc: Exception) -> bool:
88
+ err = str(exc).lower()
89
+ reconnect_markers = (
90
+ "1011",
91
+ "1006",
92
+ "keepalive",
93
+ "timed out",
94
+ "closed",
95
+ "close frame",
96
+ "connection closed",
97
+ "connectionclosed",
98
+ "websocket",
99
+ )
100
+ return any(marker in err for marker in reconnect_markers)
101
+
102
+ def reset(self):
103
+ """Reset the environment auto-reconnects if the WebSocket died."""
104
+ try:
105
+ return self._run_sync(super().reset())
106
+ except Exception as exc:
107
+ if self._is_reconnectable_ws_error(exc):
108
+ self._reconnect()
109
+ return self._run_sync(super().reset()) # one retry
110
+ raise
111
+
112
+ def step(self, action: CodeFixerAction):
113
+ """Execute a step auto-reconnects if the WebSocket died."""
114
+ try:
115
+ return self._run_sync(super().step(action))
116
+ except Exception as exc:
117
+ if self._is_reconnectable_ws_error(exc):
118
+ self._reconnect()
119
+ return self._run_sync(super().step(action)) # one retry
120
+ raise
121
+
122
+ def close(self):
123
+ """Close client resources and the dedicated event loop safely."""
124
+ try:
125
+ self._run_sync(super().close())
126
+ finally:
127
+ if not self._loop.is_closed():
128
+ self._loop.close()
129
+
130
+ def _step_payload(self, action: CodeFixerAction) -> Dict:
131
+ """
132
+ Convert CodeFixerAction to JSON payload for step message.
133
+
134
+ Args:
135
+ action: CodeFixerAction instance
136
+
137
+ Returns:
138
+ Dictionary representation suitable for JSON encoding
139
+ """
140
+ return {
141
+ "type": action.type,
142
+ "payload": action.payload,
143
+ }
144
+
145
+ def _parse_result(self, payload: Dict) -> StepResult[CodeFixerObservation]:
146
+ """
147
+ Parse server response into StepResult[CodeFixerObservation].
148
+
149
+ Args:
150
+ payload: JSON response data from server
151
+
152
+ Returns:
153
+ StepResult with CodeFixerObservation
154
+ """
155
+ obs_data = payload.get("observation", {})
156
+ observation = CodeFixerObservation(
157
+ code=obs_data.get("code", ""),
158
+ logs=obs_data.get("logs"),
159
+ test_score=float(obs_data.get("test_score", 0.0)),
160
+ total_tests=obs_data.get("total_tests", 1),
161
+ steps=obs_data.get("steps", 0),
162
+ done=obs_data.get("done", payload.get("done", False)),
163
+ reward=obs_data.get("reward", payload.get("reward")),
164
+ )
165
+
166
+ return StepResult(
167
+ observation=observation,
168
+ reward=payload.get("reward"),
169
+ done=payload.get("done", False),
170
+ )
171
+
172
+ def _parse_state(self, payload: Dict) -> State:
173
+ """
174
+ Parse server response into State object.
175
+
176
+ Args:
177
+ payload: JSON response from state request
178
+
179
+ Returns:
180
+ State object with episode_id and step_count
181
+ """
182
+ return State(
183
+ episode_id=payload.get("episode_id"),
184
+ step_count=payload.get("step_count", 0),
185
+ )
rl_code_fix_env/conftest.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ conftest.py repo-root pytest configuration.
3
+
4
+ Registers `src.dataset` as a sys.modules alias for `dataset` so that all
5
+ problem test files using `from src.dataset.problem_X.buggy import ...`
6
+ resolve correctly without needing to rename 24 test files.
7
+
8
+ The physical layout is:
9
+ <repo_root>/dataset/problem_X/buggy.py real files
10
+ <repo_root>/src/ has environment/, reward/, etc.
11
+ but NO dataset/ subfolder
12
+
13
+ With PYTHONPATH=<repo_root>:
14
+ import dataset.problem_1.buggy works natively
15
+ import src.dataset.problem_1.buggy would fail fixed here via alias
16
+ """
17
+
18
+ import sys
19
+ import importlib
20
+ from pathlib import Path
21
+
22
+ _REPO_ROOT = str(Path(__file__).parent)
23
+ if _REPO_ROOT not in sys.path:
24
+ sys.path.insert(0, _REPO_ROOT)
25
+
26
+ import dataset as _real_dataset
27
+
28
+ sys.modules.setdefault("src.dataset", _real_dataset)
29
+
30
+ import pkgutil
31
+ for _pkg in pkgutil.iter_modules(_real_dataset.__path__):
32
+ _full = f"dataset.{_pkg.name}"
33
+ _alias = f"src.dataset.{_pkg.name}"
34
+ try:
35
+ _mod = importlib.import_module(_full)
36
+ sys.modules.setdefault(_alias, _mod)
37
+ except Exception:
38
+ pass
rl_code_fix_env/dataset/README.md ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Buggy Training Dataset
2
+
3
+ This dataset is organized as:
4
+
5
+ - `problem_x/buggy.py`: intentionally buggy implementation
6
+ - `problem_x/test.py`: correctness tests that should fail before fixes
7
+ - optional extra modules (`helpers.py`, `cache.py`, etc.) to support multi-file bug fixing
8
+
9
+ Current problems: `problem_1` to `problem_18`.
10
+
11
+ Bug patterns included:
12
+ - off-by-one errors
13
+ - boundary condition mistakes
14
+ - incorrect sorting direction
15
+ - exception handling mistakes
16
+ - state/recency bugs in cache logic
17
+ - recursive base-case bugs
18
+ - parsing and whitespace normalization issues
19
+ - order-preservation regressions
20
+ - matrix transformation direction errors
rl_code_fix_env/dataset/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ """Dataset loading modules."""
rl_code_fix_env/dataset/loader.py ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Load static, competition-approved tasks."""
2
+
3
+ import os
4
+ import json
5
+ from pathlib import Path
6
+ from typing import Dict, List, Optional
7
+
8
+ # Get the dataset root (same folder as this file)
9
+ DATASET_ROOT = Path(__file__).parent
10
+
11
+ # Hardcoded competition tasks: Easy Medium Hard
12
+ STATIC_TASKS = {
13
+ "easy": {
14
+ "problem_id": "problem_1",
15
+ "difficulty": "easy",
16
+ "description": "String reversal with space normalization",
17
+ },
18
+ "medium": {
19
+ "problem_id": "problem_10",
20
+ "difficulty": "medium",
21
+ "description": "Matrix 90 clockwise rotation",
22
+ },
23
+ "hard": {
24
+ "problem_id": "problem_13",
25
+ "difficulty": "hard",
26
+ "description": "LRU cache with correct eviction policy",
27
+ },
28
+ }
29
+
30
+
31
+ def load_problem(problem_id: str) -> Dict[str, any]:
32
+ """
33
+ Load a single problem from disk.
34
+
35
+ Args:
36
+ problem_id: e.g., "problem_1", "problem_10", "problem_13"
37
+
38
+ Returns:
39
+ {
40
+ "code": str, # buggy.py content
41
+ "tests": str, # test.py path (relative to problem folder)
42
+ "metadata": dict, # metadata.json
43
+ "problem_dir": str, # absolute path to problem folder
44
+ }
45
+ """
46
+ problem_dir = DATASET_ROOT / problem_id
47
+
48
+ if not problem_dir.exists():
49
+ raise FileNotFoundError(f"Problem directory not found: {problem_dir}")
50
+
51
+ # Load buggy code
52
+ buggy_file = problem_dir / "buggy.py"
53
+ code = buggy_file.read_text(encoding="utf-8")
54
+
55
+ # Load metadata
56
+ metadata_file = problem_dir / "metadata.json"
57
+ metadata = json.loads(metadata_file.read_text(encoding="utf-8"))
58
+
59
+ # Test file path (relative to problem root)
60
+ test_path = str(problem_dir / "test.py")
61
+
62
+ return {
63
+ "code": code,
64
+ "tests": test_path,
65
+ "metadata": metadata,
66
+ "problem_dir": str(problem_dir),
67
+ "problem_id": problem_id,
68
+ }
69
+
70
+
71
+ def get_hardcoded_task(difficulty: str) -> Dict[str, any]:
72
+ """
73
+ Get one of the three static competition tasks.
74
+
75
+ Args:
76
+ difficulty: "easy" | "medium" | "hard"
77
+
78
+ Returns:
79
+ Task dict with code, tests, metadata
80
+
81
+ Raises:
82
+ ValueError: if difficulty is not one of the three approved values
83
+ """
84
+ if difficulty not in STATIC_TASKS:
85
+ raise ValueError(
86
+ f"Invalid difficulty '{difficulty}'. "
87
+ f"Must be one of: {list(STATIC_TASKS.keys())}"
88
+ )
89
+
90
+ task_info = STATIC_TASKS[difficulty]
91
+ problem_id = task_info["problem_id"]
92
+
93
+ return load_problem(problem_id)
94
+
95
+
96
+ def get_random_tasks():
97
+ """
98
+ DEPRECATED: Use get_hardcoded_task() instead.
99
+ Kept for backward compatibility.
100
+ """
101
+ import warnings
102
+ warnings.warn(
103
+ "get_random_tasks() is deprecated. Use get_hardcoded_task('easy'|'medium'|'hard')",
104
+ DeprecationWarning,
105
+ stacklevel=2
106
+ )
107
+ # Return a default (easy)
108
+ return get_hardcoded_task("easy")
109
+
110
+
111
+
rl_code_fix_env/dataset/problem_1/buggy.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ def reverse_words(text: str) -> str:
2
+ """Return the words in reverse order."""
3
+ # BUG: split(" ") keeps empty items for repeated spaces.
4
+ words = text.split(" ")
5
+ return " ".join(reversed(words))
rl_code_fix_env/dataset/problem_1/metadata.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "difficulty": "easy",
3
+ "bug_type": "string-splitting",
4
+ "expected_steps": 1
5
+ }
rl_code_fix_env/dataset/problem_1/test.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import unittest
2
+ from src.dataset.problem_1.buggy import reverse_words
3
+
4
+
5
+ class TestReverseWords(unittest.TestCase):
6
+ def test_simple(self):
7
+ self.assertEqual(reverse_words("hello world"), "world hello")
8
+
9
+ def test_multiple_spaces(self):
10
+ self.assertEqual(reverse_words("one two three"), "three two one")
11
+
12
+
13
+ if __name__ == "__main__":
14
+ unittest.main()
rl_code_fix_env/dataset/problem_10/buggy.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ from src.dataset.problem_10.helpers import transpose
2
+
3
+
4
+ def rotate_90_clockwise(matrix: list[list[int]]) -> list[list[int]]:
5
+ """Rotate matrix 90 degrees clockwise."""
6
+ t = transpose(matrix)
7
+ # BUG: this is counter-clockwise.
8
+ return t[::-1]
rl_code_fix_env/dataset/problem_10/helpers.py ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ def transpose(matrix: list[list[int]]) -> list[list[int]]:
2
+ return [list(row) for row in zip(*matrix)]
rl_code_fix_env/dataset/problem_10/metadata.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "difficulty": "medium",
3
+ "bug_type": "matrix-transformation",
4
+ "expected_steps": 1
5
+ }
rl_code_fix_env/dataset/problem_10/test.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import unittest
2
+ from src.dataset.problem_10.buggy import rotate_90_clockwise
3
+
4
+
5
+ class TestRotateMatrix(unittest.TestCase):
6
+ def test_2x2(self):
7
+ matrix = [[1, 2], [3, 4]]
8
+ self.assertEqual(rotate_90_clockwise(matrix), [[3, 1], [4, 2]])
9
+
10
+
11
+ if __name__ == "__main__":
12
+ unittest.main()
rl_code_fix_env/dataset/problem_11/buggy.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def binary_search(nums: list[int], target: int) -> int:
2
+ """Return index of target, or -1 if not found."""
3
+ left, right = 0, len(nums) - 1
4
+
5
+ while left < right:
6
+ mid = (left + right) // 2
7
+ if nums[mid] == target:
8
+ return mid
9
+ if nums[mid] < target:
10
+ left = mid + 1
11
+ else:
12
+ right = mid - 1
13
+
14
+ return -1
rl_code_fix_env/dataset/problem_11/metadata.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "difficulty": "medium",
3
+ "bug_type": "boundary-condition",
4
+ "expected_steps": 2
5
+ }
rl_code_fix_env/dataset/problem_11/test.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import unittest
2
+ from src.dataset.problem_11.buggy import binary_search
3
+
4
+
5
+ class TestBinarySearch(unittest.TestCase):
6
+ def test_found_middle(self):
7
+ self.assertEqual(binary_search([1, 3, 5, 7], 5), 2)
8
+
9
+ def test_found_last(self):
10
+ self.assertEqual(binary_search([1, 3, 5, 7], 7), 3)
11
+
12
+ def test_not_found(self):
13
+ self.assertEqual(binary_search([1, 3, 5, 7], 4), -1)
14
+
15
+
16
+ if __name__ == "__main__":
17
+ unittest.main()
rl_code_fix_env/dataset/problem_12/buggy.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def parse_pairs(raw: str) -> dict[str, int]:
2
+ """Parse strings like 'a=1,b=2' into a dict."""
3
+ result = {}
4
+ if not raw:
5
+ return result
6
+
7
+ for segment in raw.split(","):
8
+ key, value = segment.split("=")
9
+ # BUG: does not strip whitespace around keys/values.
10
+ result[key] = int(value)
11
+ return result
rl_code_fix_env/dataset/problem_12/metadata.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "difficulty": "easy",
3
+ "bug_type": "string-normalization",
4
+ "expected_steps": 2
5
+ }
rl_code_fix_env/dataset/problem_12/test.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import unittest
2
+ from src.dataset.problem_12.buggy import parse_pairs
3
+
4
+
5
+ class TestParsePairs(unittest.TestCase):
6
+ def test_simple(self):
7
+ self.assertEqual(parse_pairs("a=1,b=2"), {"a": 1, "b": 2})
8
+
9
+ def test_spaces(self):
10
+ self.assertEqual(parse_pairs("x = 10, y = 20"), {"x": 10, "y": 20})
11
+
12
+
13
+ if __name__ == "__main__":
14
+ unittest.main()
rl_code_fix_env/dataset/problem_13/buggy.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ from src.dataset.problem_13.cache import LRUCache
2
+
3
+
4
+ def run_ops() -> tuple[int, int]:
5
+ cache = LRUCache(2)
6
+ cache.put("a", 1)
7
+ cache.put("b", 2)
8
+ _ = cache.get("a")
9
+ cache.put("c", 3)
10
+ return cache.get("a"), cache.get("b")
rl_code_fix_env/dataset/problem_13/cache.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from collections import OrderedDict
2
+
3
+
4
+ class LRUCache:
5
+ def __init__(self, capacity: int):
6
+ self.capacity = capacity
7
+ self.store: OrderedDict[str, int] = OrderedDict()
8
+
9
+ def get(self, key: str) -> int:
10
+ if key not in self.store:
11
+ return -1
12
+ # BUG: does not refresh recency when key is accessed.
13
+ return self.store[key]
14
+
15
+ def put(self, key: str, value: int) -> None:
16
+ if key in self.store:
17
+ self.store.pop(key)
18
+ self.store[key] = value
19
+ if len(self.store) > self.capacity:
20
+ self.store.popitem(last=False)
rl_code_fix_env/dataset/problem_13/metadata.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "difficulty": "hard",
3
+ "bug_type": "state-logic",
4
+ "expected_steps": 2
5
+ }
rl_code_fix_env/dataset/problem_13/test.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import unittest
2
+ from src.dataset.problem_13.buggy import run_ops
3
+
4
+
5
+ class TestLRU(unittest.TestCase):
6
+ def test_recency_update_on_get(self):
7
+ a, b = run_ops()
8
+ self.assertEqual(a, 1)
9
+ self.assertEqual(b, -1)
10
+
11
+
12
+ if __name__ == "__main__":
13
+ unittest.main()
rl_code_fix_env/dataset/problem_14/buggy.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ def fibonacci_recursive(n: int) -> int:
2
+ """Return nth Fibonacci number."""
3
+ # BUG: wrong base case for n == 0.
4
+ if n <= 1:
5
+ return 1
6
+ return fibonacci_recursive(n - 1) + fibonacci_recursive(n - 2)
rl_code_fix_env/dataset/problem_14/metadata.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "difficulty": "easy",
3
+ "bug_type": "recursion-base-case",
4
+ "expected_steps": 2
5
+ }
rl_code_fix_env/dataset/problem_14/test.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import unittest
2
+ from src.dataset.problem_14.buggy import fibonacci_recursive
3
+
4
+
5
+ class TestFibonacciRecursive(unittest.TestCase):
6
+ def test_base_cases(self):
7
+ self.assertEqual(fibonacci_recursive(0), 0)
8
+ self.assertEqual(fibonacci_recursive(1), 1)
9
+
10
+ def test_n5(self):
11
+ self.assertEqual(fibonacci_recursive(5), 5)
12
+
13
+
14
+ if __name__ == "__main__":
15
+ unittest.main()
rl_code_fix_env/dataset/problem_15/buggy.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ def has_overlap(a: tuple[int, int], b: tuple[int, int]) -> bool:
2
+ """Check if closed intervals [a0, a1] and [b0, b1] overlap."""
3
+ # BUG: uses strict inequalities, missing touching-boundary overlap.
4
+ return a[0] < b[1] and b[0] < a[1]
rl_code_fix_env/dataset/problem_15/metadata.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "difficulty": "medium",
3
+ "bug_type": "boundary-condition",
4
+ "expected_steps": 1
5
+ }
rl_code_fix_env/dataset/problem_15/test.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import unittest
2
+ from src.dataset.problem_15.buggy import has_overlap
3
+
4
+
5
+ class TestIntervalOverlap(unittest.TestCase):
6
+ def test_overlapping(self):
7
+ self.assertTrue(has_overlap((1, 5), (4, 9)))
8
+
9
+ def test_touching_endpoints(self):
10
+ self.assertTrue(has_overlap((1, 3), (3, 7)))
11
+
12
+
13
+ if __name__ == "__main__":
14
+ unittest.main()
rl_code_fix_env/dataset/problem_16/buggy.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ from src.dataset.problem_16.helpers import normalize_scores
2
+
3
+
4
+ def top_label(scores: dict[str, float]) -> str:
5
+ """Return label with highest normalized probability."""
6
+ labels = list(scores.keys())
7
+ probs = normalize_scores(list(scores.values()))
8
+ # BUG: chooses min instead of max.
9
+ idx = min(range(len(probs)), key=lambda i: probs[i])
10
+ return labels[idx]
rl_code_fix_env/dataset/problem_16/helpers.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ def normalize_scores(scores: list[float]) -> list[float]:
2
+ total = sum(scores)
3
+ return [s / total for s in scores]
rl_code_fix_env/dataset/problem_16/metadata.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "difficulty": "easy",
3
+ "bug_type": "logic-error",
4
+ "expected_steps": 1
5
+ }
rl_code_fix_env/dataset/problem_16/test.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import unittest
2
+ from src.dataset.problem_16.buggy import top_label
3
+
4
+
5
+ class TestTopLabel(unittest.TestCase):
6
+ def test_select_highest(self):
7
+ scores = {"cat": 0.2, "dog": 0.7, "bird": 0.1}
8
+ self.assertEqual(top_label(scores), "dog")
9
+
10
+
11
+ if __name__ == "__main__":
12
+ unittest.main()
rl_code_fix_env/dataset/problem_17/buggy.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def dedupe_preserve_order(items: list[int]) -> list[int]:
2
+ """Remove duplicates while preserving first occurrence order."""
3
+ seen = set()
4
+ out = []
5
+ for item in items:
6
+ # BUG: keeps last occurrence logic effectively by replacing list.
7
+ if item in seen:
8
+ out = [x for x in out if x != item]
9
+ seen.add(item)
10
+ out.append(item)
11
+ return out
rl_code_fix_env/dataset/problem_17/metadata.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "difficulty": "medium",
3
+ "bug_type": "logic-error",
4
+ "expected_steps": 2
5
+ }
rl_code_fix_env/dataset/problem_17/test.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import unittest
2
+ from src.dataset.problem_17.buggy import dedupe_preserve_order
3
+
4
+
5
+ class TestDedupe(unittest.TestCase):
6
+ def test_order(self):
7
+ self.assertEqual(dedupe_preserve_order([1, 2, 1, 3, 2]), [1, 2, 3])
8
+
9
+
10
+ if __name__ == "__main__":
11
+ unittest.main()
rl_code_fix_env/dataset/problem_18/buggy.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from src.dataset.problem_18.math_utils import clamp
2
+
3
+
4
+ def moving_average(nums: list[int], window: int) -> list[float]:
5
+ """Simple moving average over a fixed window."""
6
+ if window <= 0:
7
+ raise ValueError("window must be positive")
8
+
9
+ window = clamp(window, 1, len(nums))
10
+ out = []
11
+ # BUG: end index is off-by-one; misses final valid window.
12
+ for i in range(0, len(nums) - window):
13
+ out.append(sum(nums[i : i + window]) / window)
14
+ return out
rl_code_fix_env/dataset/problem_18/math_utils.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ def clamp(value: int, low: int, high: int) -> int:
2
+ if value < low:
3
+ return low
4
+ if value > high:
5
+ return high
6
+ return value
rl_code_fix_env/dataset/problem_18/metadata.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "difficulty": "medium",
3
+ "bug_type": "off-by-one",
4
+ "expected_steps": 1
5
+ }
rl_code_fix_env/dataset/problem_18/test.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import unittest
2
+ from src.dataset.problem_18.buggy import moving_average
3
+
4
+
5
+ class TestMovingAverage(unittest.TestCase):
6
+ def test_window_3(self):
7
+ self.assertEqual(moving_average([1, 2, 3, 4, 5], 3), [2.0, 3.0, 4.0])
8
+
9
+ def test_window_larger_than_data(self):
10
+ self.assertEqual(moving_average([2, 4], 5), [3.0])
11
+
12
+
13
+ if __name__ == "__main__":
14
+ unittest.main()
rl_code_fix_env/dataset/problem_19/buggy.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def calculate_employee_bonus(employees: list[dict], metrics: dict) -> list[dict]:
2
+ """
3
+ Calculate employee bonuses based on their base salary, performance rating,
4
+ and company-wide metrics.
5
+
6
+ employees: list of dicts with 'id', 'role', 'base_salary', 'rating' (1-5)
7
+ metrics: dict with 'company_multiplier' and 'department_multipliers'
8
+
9
+ Returns a list of dicts with 'id' and 'bonus'.
10
+ """
11
+ results = []
12
+
13
+ for emp in employees:
14
+ # BUG 1: Division by zero risk if rating is 0 or missing, and type mismatch if salary is string
15
+ base = emp.get('base_salary', 0)
16
+ rating = emp.get('rating', 1)
17
+
18
+ # BUG 2: Incorrect logic for role based multiplier, using assignment instead of lookup
19
+ role_mult = metrics.get('department_multipliers', {})[emp.get('role')] # will raise KeyError if role not found
20
+
21
+ # Calculate base bonus
22
+ if rating > 3:
23
+ base_bonus = base * 0.1
24
+ elif rating == 3:
25
+ base_bonus = base * 0.05
26
+ else:
27
+ base_bonus = 0
28
+
29
+ # BUG 3: Does not apply company multiplier correctly to the total
30
+ total_bonus = base_bonus * role_mult + metrics.get('company_multiplier', 1)
31
+
32
+ # BUG 4: mutating original dict instead of creating new one
33
+ emp['bonus'] = total_bonus
34
+ results.append(emp)
35
+
36
+ return results
rl_code_fix_env/dataset/problem_19/metadata.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "difficulty": "hard",
3
+ "bug_type": "multiple",
4
+ "expected_steps": 4
5
+ }
rl_code_fix_env/dataset/problem_19/test.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pytest
2
+ from src.dataset.problem_19.buggy import calculate_employee_bonus
3
+
4
+ def test_calculate_employee_bonus():
5
+ employees = [
6
+ {'id': 1, 'role': 'engineering', 'base_salary': 100000, 'rating': 4},
7
+ {'id': 2, 'role': 'sales', 'base_salary': '80000', 'rating': 3},
8
+ {'id': 3, 'role': 'hr', 'base_salary': 60000, 'rating': 2},
9
+ {'id': 4, 'role': 'unknown', 'base_salary': 50000, 'rating': 5}
10
+ ]
11
+
12
+ metrics = {
13
+ 'company_multiplier': 1.2,
14
+ 'department_multipliers': {
15
+ 'engineering': 1.5,
16
+ 'sales': 1.2,
17
+ 'hr': 1.0
18
+ }
19
+ }
20
+
21
+ # Original dicts should not be modified
22
+ orig_employees = [dict(e) for e in employees]
23
+
24
+ results = calculate_employee_bonus(employees, metrics)
25
+
26
+ # Check if original was modified
27
+ assert employees == orig_employees, "Original list was mutated"
28
+
29
+ # Check results format
30
+ assert len(results) == 4
31
+ for r in results:
32
+ assert 'id' in r
33
+ assert 'bonus' in r
34
+ assert 'role' not in r # Should only contain id and bonus
35
+
36
+ # Check values
37
+ # Emp 1: 100000 * 0.1 * 1.5 * 1.2 = 18000
38
+ assert results[0]['bonus'] == 18000
39
+
40
+ # Emp 2: 80000 * 0.05 * 1.2 * 1.2 = 5760 (string salary handling)
41
+ assert results[1]['bonus'] == 5760
42
+
43
+ # Emp 3: 0 bonus due to rating 2
44
+ assert results[2]['bonus'] == 0
45
+
46
+ # Emp 4: unknown role falls back to 1.0 multiplier
47
+ # 50000 * 0.1 * 1.0 * 1.2 = 6000
48
+ assert results[3]['bonus'] == 6000
rl_code_fix_env/dataset/problem_2/buggy.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ def is_palindrome(text: str) -> bool:
2
+ """Check whether text is a palindrome."""
3
+ # BUG: does not normalize case or skip non-alphanumeric chars.
4
+ cleaned = text.strip()
5
+ return cleaned == cleaned[::-1]