Spaces:
Running
Running
Viraaj Sawant commited on
Commit ·
8a4b89f
0
Parent(s):
Initial push of Mini RL Env
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- .gitignore +8 -0
- prompts.py +37 -0
- requirements.txt +26 -0
- rl_code_fix_env/.dockerignore +45 -0
- rl_code_fix_env/.gitignore +8 -0
- rl_code_fix_env/README.md +255 -0
- rl_code_fix_env/__init__.py +14 -0
- rl_code_fix_env/client.py +185 -0
- rl_code_fix_env/conftest.py +38 -0
- rl_code_fix_env/dataset/README.md +20 -0
- rl_code_fix_env/dataset/__init__.py +1 -0
- rl_code_fix_env/dataset/loader.py +111 -0
- rl_code_fix_env/dataset/problem_1/buggy.py +5 -0
- rl_code_fix_env/dataset/problem_1/metadata.json +5 -0
- rl_code_fix_env/dataset/problem_1/test.py +14 -0
- rl_code_fix_env/dataset/problem_10/buggy.py +8 -0
- rl_code_fix_env/dataset/problem_10/helpers.py +2 -0
- rl_code_fix_env/dataset/problem_10/metadata.json +5 -0
- rl_code_fix_env/dataset/problem_10/test.py +12 -0
- rl_code_fix_env/dataset/problem_11/buggy.py +14 -0
- rl_code_fix_env/dataset/problem_11/metadata.json +5 -0
- rl_code_fix_env/dataset/problem_11/test.py +17 -0
- rl_code_fix_env/dataset/problem_12/buggy.py +11 -0
- rl_code_fix_env/dataset/problem_12/metadata.json +5 -0
- rl_code_fix_env/dataset/problem_12/test.py +14 -0
- rl_code_fix_env/dataset/problem_13/buggy.py +10 -0
- rl_code_fix_env/dataset/problem_13/cache.py +20 -0
- rl_code_fix_env/dataset/problem_13/metadata.json +5 -0
- rl_code_fix_env/dataset/problem_13/test.py +13 -0
- rl_code_fix_env/dataset/problem_14/buggy.py +6 -0
- rl_code_fix_env/dataset/problem_14/metadata.json +5 -0
- rl_code_fix_env/dataset/problem_14/test.py +15 -0
- rl_code_fix_env/dataset/problem_15/buggy.py +4 -0
- rl_code_fix_env/dataset/problem_15/metadata.json +5 -0
- rl_code_fix_env/dataset/problem_15/test.py +14 -0
- rl_code_fix_env/dataset/problem_16/buggy.py +10 -0
- rl_code_fix_env/dataset/problem_16/helpers.py +3 -0
- rl_code_fix_env/dataset/problem_16/metadata.json +5 -0
- rl_code_fix_env/dataset/problem_16/test.py +12 -0
- rl_code_fix_env/dataset/problem_17/buggy.py +11 -0
- rl_code_fix_env/dataset/problem_17/metadata.json +5 -0
- rl_code_fix_env/dataset/problem_17/test.py +11 -0
- rl_code_fix_env/dataset/problem_18/buggy.py +14 -0
- rl_code_fix_env/dataset/problem_18/math_utils.py +6 -0
- rl_code_fix_env/dataset/problem_18/metadata.json +5 -0
- rl_code_fix_env/dataset/problem_18/test.py +14 -0
- rl_code_fix_env/dataset/problem_19/buggy.py +36 -0
- rl_code_fix_env/dataset/problem_19/metadata.json +5 -0
- rl_code_fix_env/dataset/problem_19/test.py +48 -0
- rl_code_fix_env/dataset/problem_2/buggy.py +5 -0
.gitignore
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*.pdf
|
| 2 |
+
venv/
|
| 3 |
+
.venv/
|
| 4 |
+
__pycache__/
|
| 5 |
+
.env
|
| 6 |
+
commands.md
|
| 7 |
+
logs.md
|
| 8 |
+
inference&docker.md
|
prompts.py
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
LLM_SCORER_PROMPT = """
|
| 2 |
+
You are a reward model for an autonomous code bug-fixing agent trained with reinforcement learning.
|
| 3 |
+
Your scores are used directly as a learning signal — be precise, consistent, and strict.
|
| 4 |
+
|
| 5 |
+
You will receive:
|
| 6 |
+
- ORIGINAL: the buggy code before the agent's fix
|
| 7 |
+
- PATCHED: the code after the agent applied its patch
|
| 8 |
+
|
| 9 |
+
Evaluate the agent's fix on exactly three axes, each scored 0.0–10.0:
|
| 10 |
+
|
| 11 |
+
1. CORRECTNESS — Does the patch fix the bug(s) without introducing new ones?
|
| 12 |
+
Full marks only if the fix is semantically correct and complete.
|
| 13 |
+
Penalise partial fixes, over-patches, or fixes that mask rather than resolve the root cause.
|
| 14 |
+
|
| 15 |
+
2. MINIMALITY — Is the diff minimal? Penalise unnecessary refactors, renames, whitespace-only changes,
|
| 16 |
+
or reformatting of lines unrelated to the bug.
|
| 17 |
+
|
| 18 |
+
3. QUALITY — Is the patched code readable and idiomatic? Penalise: broken naming conventions,
|
| 19 |
+
added dead code, removed necessary comments, or degraded clarity vs. the original.
|
| 20 |
+
|
| 21 |
+
Respond ONLY with this JSON — no preamble, no trailing text:
|
| 22 |
+
{
|
| 23 |
+
"correctness": <float 0.0-10.0>,
|
| 24 |
+
"minimality": <float 0.0-10.0>,
|
| 25 |
+
"quality": <float 0.0-10.0>,
|
| 26 |
+
"reasoning": "<one concise sentence per axis, pipe-separated>"
|
| 27 |
+
}
|
| 28 |
+
"""
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
USER_TEMPLATE ="""
|
| 32 |
+
ORIGINAL:
|
| 33 |
+
```python
|
| 34 |
+
{original_code}
|
| 35 |
+
```
|
| 36 |
+
Return only the JSON.
|
| 37 |
+
"""
|
requirements.txt
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
datasets
|
| 3 |
+
gymnasium
|
| 4 |
+
rich
|
| 5 |
+
tqdm
|
| 6 |
+
matplotlib
|
| 7 |
+
seaborn
|
| 8 |
+
|
| 9 |
+
pandas
|
| 10 |
+
numpy
|
| 11 |
+
openenv-core
|
| 12 |
+
fastapi
|
| 13 |
+
requests
|
| 14 |
+
uvicorn
|
| 15 |
+
pydantic
|
| 16 |
+
streamlit
|
| 17 |
+
|
| 18 |
+
groq
|
| 19 |
+
langchain
|
| 20 |
+
langchain-core
|
| 21 |
+
huggingface_hub
|
| 22 |
+
|
| 23 |
+
loguru
|
| 24 |
+
pytest
|
| 25 |
+
unidiff
|
| 26 |
+
diff-match-patch
|
rl_code_fix_env/.dockerignore
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Virtual environments (Windows/Linux/Mac)
|
| 2 |
+
.venv/
|
| 3 |
+
venv/
|
| 4 |
+
env/
|
| 5 |
+
ENV/
|
| 6 |
+
.env
|
| 7 |
+
|
| 8 |
+
# Python cache & compiled
|
| 9 |
+
__pycache__/
|
| 10 |
+
*.pyc
|
| 11 |
+
*.pyo
|
| 12 |
+
*.pyd
|
| 13 |
+
.Python
|
| 14 |
+
*.egg-info/
|
| 15 |
+
dist/
|
| 16 |
+
build/
|
| 17 |
+
*.egg
|
| 18 |
+
venv
|
| 19 |
+
.venv
|
| 20 |
+
|
| 21 |
+
# Testing & coverage
|
| 22 |
+
.pytest_cache/
|
| 23 |
+
.coverage
|
| 24 |
+
htmlcov/
|
| 25 |
+
|
| 26 |
+
# IDE & editor
|
| 27 |
+
.vscode/
|
| 28 |
+
.idea/
|
| 29 |
+
*.swp
|
| 30 |
+
*.swo
|
| 31 |
+
*~
|
| 32 |
+
.DS_Store
|
| 33 |
+
|
| 34 |
+
# Version control
|
| 35 |
+
.git/
|
| 36 |
+
.gitignore
|
| 37 |
+
|
| 38 |
+
# Build/cache
|
| 39 |
+
.mypy_cache/
|
| 40 |
+
*.log
|
| 41 |
+
|
| 42 |
+
# Docker
|
| 43 |
+
Dockerfile
|
| 44 |
+
.dockerignore
|
| 45 |
+
docker-compose.yml
|
rl_code_fix_env/.gitignore
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*.pdf
|
| 2 |
+
venv/
|
| 3 |
+
.venv/
|
| 4 |
+
__pycache__/
|
| 5 |
+
.env
|
| 6 |
+
*.pyc
|
| 7 |
+
*.egg
|
| 8 |
+
pytest-cache-files-*/
|
rl_code_fix_env/README.md
ADDED
|
@@ -0,0 +1,255 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: Rl Code Fix Env Environment Server
|
| 3 |
+
emoji:
|
| 4 |
+
colorFrom: green
|
| 5 |
+
colorTo: purple
|
| 6 |
+
sdk: docker
|
| 7 |
+
pinned: false
|
| 8 |
+
app_port: 8000
|
| 9 |
+
base_path: /web
|
| 10 |
+
tags:
|
| 11 |
+
- openenv
|
| 12 |
+
---
|
| 13 |
+
|
| 14 |
+
# Rl Code Fix Env Environment
|
| 15 |
+
|
| 16 |
+
A simple test environment that echoes back messages. Perfect for testing the env APIs as well as demonstrating environment usage patterns.
|
| 17 |
+
|
| 18 |
+
## Quick Start
|
| 19 |
+
|
| 20 |
+
The simplest way to use the Rl Code Fix Env environment is through the `RlCodeFixEnv` class:
|
| 21 |
+
|
| 22 |
+
```python
|
| 23 |
+
from rl_code_fix_env import RlCodeFixAction, RlCodeFixEnv
|
| 24 |
+
|
| 25 |
+
try:
|
| 26 |
+
# Create environment from Docker image
|
| 27 |
+
rl_code_fix_envenv = RlCodeFixEnv.from_docker_image("rl_code_fix_env-env:latest")
|
| 28 |
+
|
| 29 |
+
# Reset
|
| 30 |
+
result = rl_code_fix_envenv.reset()
|
| 31 |
+
print(f"Reset: {result.observation.echoed_message}")
|
| 32 |
+
|
| 33 |
+
# Send multiple messages
|
| 34 |
+
messages = ["Hello, World!", "Testing echo", "Final message"]
|
| 35 |
+
|
| 36 |
+
for msg in messages:
|
| 37 |
+
result = rl_code_fix_envenv.step(RlCodeFixAction(message=msg))
|
| 38 |
+
print(f"Sent: '{msg}'")
|
| 39 |
+
print(f" Echoed: '{result.observation.echoed_message}'")
|
| 40 |
+
print(f" Length: {result.observation.message_length}")
|
| 41 |
+
print(f" Reward: {result.reward}")
|
| 42 |
+
|
| 43 |
+
finally:
|
| 44 |
+
# Always clean up
|
| 45 |
+
rl_code_fix_envenv.close()
|
| 46 |
+
```
|
| 47 |
+
|
| 48 |
+
That's it! The `RlCodeFixEnv.from_docker_image()` method handles:
|
| 49 |
+
- Starting the Docker container
|
| 50 |
+
- Waiting for the server to be ready
|
| 51 |
+
- Connecting to the environment
|
| 52 |
+
- Container cleanup when you call `close()`
|
| 53 |
+
|
| 54 |
+
## Building the Docker Image
|
| 55 |
+
|
| 56 |
+
Before using the environment, you need to build the Docker image:
|
| 57 |
+
|
| 58 |
+
```bash
|
| 59 |
+
# From project root
|
| 60 |
+
docker build -t rl_code_fix_env-env:latest -f server/Dockerfile .
|
| 61 |
+
```
|
| 62 |
+
|
| 63 |
+
## Deploying to Hugging Face Spaces
|
| 64 |
+
|
| 65 |
+
You can easily deploy your OpenEnv environment to Hugging Face Spaces using the `openenv push` command:
|
| 66 |
+
|
| 67 |
+
```bash
|
| 68 |
+
# From the environment directory (where openenv.yaml is located)
|
| 69 |
+
openenv push
|
| 70 |
+
|
| 71 |
+
# Or specify options
|
| 72 |
+
openenv push --namespace my-org --private
|
| 73 |
+
```
|
| 74 |
+
|
| 75 |
+
The `openenv push` command will:
|
| 76 |
+
1. Validate that the directory is an OpenEnv environment (checks for `openenv.yaml`)
|
| 77 |
+
2. Prepare a custom build for Hugging Face Docker space (enables web interface)
|
| 78 |
+
3. Upload to Hugging Face (ensuring you're logged in)
|
| 79 |
+
|
| 80 |
+
### Prerequisites
|
| 81 |
+
|
| 82 |
+
- Authenticate with Hugging Face: The command will prompt for login if not already authenticated
|
| 83 |
+
|
| 84 |
+
### Options
|
| 85 |
+
|
| 86 |
+
- `--directory`, `-d`: Directory containing the OpenEnv environment (defaults to current directory)
|
| 87 |
+
- `--repo-id`, `-r`: Repository ID in format 'username/repo-name' (defaults to 'username/env-name' from openenv.yaml)
|
| 88 |
+
- `--base-image`, `-b`: Base Docker image to use (overrides Dockerfile FROM)
|
| 89 |
+
- `--private`: Deploy the space as private (default: public)
|
| 90 |
+
|
| 91 |
+
### Examples
|
| 92 |
+
|
| 93 |
+
```bash
|
| 94 |
+
# Push to your personal namespace (defaults to username/env-name from openenv.yaml)
|
| 95 |
+
openenv push
|
| 96 |
+
|
| 97 |
+
# Push to a specific repository
|
| 98 |
+
openenv push --repo-id my-org/my-env
|
| 99 |
+
|
| 100 |
+
# Push with a custom base image
|
| 101 |
+
openenv push --base-image ghcr.io/meta-pytorch/openenv-base:latest
|
| 102 |
+
|
| 103 |
+
# Push as a private space
|
| 104 |
+
openenv push --private
|
| 105 |
+
|
| 106 |
+
# Combine options
|
| 107 |
+
openenv push --repo-id my-org/my-env --base-image custom-base:latest --private
|
| 108 |
+
```
|
| 109 |
+
|
| 110 |
+
After deployment, your space will be available at:
|
| 111 |
+
`https://huggingface.co/spaces/<repo-id>`
|
| 112 |
+
|
| 113 |
+
The deployed space includes:
|
| 114 |
+
- **Web Interface** at `/web` - Interactive UI for exploring the environment
|
| 115 |
+
- **API Documentation** at `/docs` - Full OpenAPI/Swagger interface
|
| 116 |
+
- **Health Check** at `/health` - Container health monitoring
|
| 117 |
+
- **WebSocket** at `/ws` - Persistent session endpoint for low-latency interactions
|
| 118 |
+
|
| 119 |
+
## Environment Details
|
| 120 |
+
|
| 121 |
+
### Action
|
| 122 |
+
**RlCodeFixAction**: Contains a single field
|
| 123 |
+
- `message` (str) - The message to echo back
|
| 124 |
+
|
| 125 |
+
### Observation
|
| 126 |
+
**RlCodeFixObservation**: Contains the echo response and metadata
|
| 127 |
+
- `echoed_message` (str) - The message echoed back
|
| 128 |
+
- `message_length` (int) - Length of the message
|
| 129 |
+
- `reward` (float) - Reward based on message length (length 0.1)
|
| 130 |
+
- `done` (bool) - Always False for echo environment
|
| 131 |
+
- `metadata` (dict) - Additional info like step count
|
| 132 |
+
|
| 133 |
+
### Reward
|
| 134 |
+
The reward is calculated as: `message_length 0.1`
|
| 135 |
+
- "Hi" reward: 0.2
|
| 136 |
+
- "Hello, World!" reward: 1.3
|
| 137 |
+
- Empty message reward: 0.0
|
| 138 |
+
|
| 139 |
+
## Advanced Usage
|
| 140 |
+
|
| 141 |
+
### Connecting to an Existing Server
|
| 142 |
+
|
| 143 |
+
If you already have a Rl Code Fix Env environment server running, you can connect directly:
|
| 144 |
+
|
| 145 |
+
```python
|
| 146 |
+
from rl_code_fix_env import RlCodeFixEnv
|
| 147 |
+
|
| 148 |
+
# Connect to existing server
|
| 149 |
+
rl_code_fix_envenv = RlCodeFixEnv(base_url="<ENV_HTTP_URL_HERE>")
|
| 150 |
+
|
| 151 |
+
# Use as normal
|
| 152 |
+
result = rl_code_fix_envenv.reset()
|
| 153 |
+
result = rl_code_fix_envenv.step(RlCodeFixAction(message="Hello!"))
|
| 154 |
+
```
|
| 155 |
+
|
| 156 |
+
Note: When connecting to an existing server, `rl_code_fix_envenv.close()` will NOT stop the server.
|
| 157 |
+
|
| 158 |
+
### Using the Context Manager
|
| 159 |
+
|
| 160 |
+
The client supports context manager usage for automatic connection management:
|
| 161 |
+
|
| 162 |
+
```python
|
| 163 |
+
from rl_code_fix_env import RlCodeFixAction, RlCodeFixEnv
|
| 164 |
+
|
| 165 |
+
# Connect with context manager (auto-connects and closes)
|
| 166 |
+
with RlCodeFixEnv(base_url="http://localhost:8000") as env:
|
| 167 |
+
result = env.reset()
|
| 168 |
+
print(f"Reset: {result.observation.echoed_message}")
|
| 169 |
+
# Multiple steps with low latency
|
| 170 |
+
for msg in ["Hello", "World", "!"]:
|
| 171 |
+
result = env.step(RlCodeFixAction(message=msg))
|
| 172 |
+
print(f"Echoed: {result.observation.echoed_message}")
|
| 173 |
+
```
|
| 174 |
+
|
| 175 |
+
The client uses WebSocket connections for:
|
| 176 |
+
- **Lower latency**: No HTTP connection overhead per request
|
| 177 |
+
- **Persistent session**: Server maintains your environment state
|
| 178 |
+
- **Efficient for episodes**: Better for many sequential steps
|
| 179 |
+
|
| 180 |
+
### Concurrent WebSocket Sessions
|
| 181 |
+
|
| 182 |
+
The server supports multiple concurrent WebSocket connections. To enable this,
|
| 183 |
+
modify `server/app.py` to use factory mode:
|
| 184 |
+
|
| 185 |
+
```python
|
| 186 |
+
# In server/app.py - use factory mode for concurrent sessions
|
| 187 |
+
app = create_app(
|
| 188 |
+
RlCodeFixEnvironment, # Pass class, not instance
|
| 189 |
+
RlCodeFixAction,
|
| 190 |
+
RlCodeFixObservation,
|
| 191 |
+
max_concurrent_envs=4, # Allow 4 concurrent sessions
|
| 192 |
+
)
|
| 193 |
+
```
|
| 194 |
+
|
| 195 |
+
Then multiple clients can connect simultaneously:
|
| 196 |
+
|
| 197 |
+
```python
|
| 198 |
+
from rl_code_fix_env import RlCodeFixAction, RlCodeFixEnv
|
| 199 |
+
from concurrent.futures import ThreadPoolExecutor
|
| 200 |
+
|
| 201 |
+
def run_episode(client_id: int):
|
| 202 |
+
with RlCodeFixEnv(base_url="http://localhost:8000") as env:
|
| 203 |
+
result = env.reset()
|
| 204 |
+
for i in range(10):
|
| 205 |
+
result = env.step(RlCodeFixAction(message=f"Client {client_id}, step {i}"))
|
| 206 |
+
return client_id, result.observation.message_length
|
| 207 |
+
|
| 208 |
+
# Run 4 episodes concurrently
|
| 209 |
+
with ThreadPoolExecutor(max_workers=4) as executor:
|
| 210 |
+
results = list(executor.map(run_episode, range(4)))
|
| 211 |
+
```
|
| 212 |
+
|
| 213 |
+
## Development & Testing
|
| 214 |
+
|
| 215 |
+
### Direct Environment Testing
|
| 216 |
+
|
| 217 |
+
Test the environment logic directly without starting the HTTP server:
|
| 218 |
+
|
| 219 |
+
```bash
|
| 220 |
+
# From the server directory
|
| 221 |
+
python3 server/rl_code_fix_env_environment.py
|
| 222 |
+
```
|
| 223 |
+
|
| 224 |
+
This verifies that:
|
| 225 |
+
- Environment resets correctly
|
| 226 |
+
- Step executes actions properly
|
| 227 |
+
- State tracking works
|
| 228 |
+
- Rewards are calculated correctly
|
| 229 |
+
|
| 230 |
+
### Running Locally
|
| 231 |
+
|
| 232 |
+
Run the server locally for development:
|
| 233 |
+
|
| 234 |
+
```bash
|
| 235 |
+
uvicorn server.app:app --reload
|
| 236 |
+
```
|
| 237 |
+
|
| 238 |
+
## Project Structure
|
| 239 |
+
|
| 240 |
+
```
|
| 241 |
+
rl_code_fix_env/
|
| 242 |
+
.dockerignore # Docker build exclusions
|
| 243 |
+
__init__.py # Module exports
|
| 244 |
+
README.md # This file
|
| 245 |
+
openenv.yaml # OpenEnv manifest
|
| 246 |
+
pyproject.toml # Project metadata and dependencies
|
| 247 |
+
uv.lock # Locked dependencies (generated)
|
| 248 |
+
client.py # RlCodeFixEnv client
|
| 249 |
+
models.py # Action and Observation models
|
| 250 |
+
server/
|
| 251 |
+
__init__.py # Server module exports
|
| 252 |
+
rl_code_fix_env_environment.py # Core environment logic
|
| 253 |
+
app.py # FastAPI application (HTTP + WebSocket endpoints)
|
| 254 |
+
Dockerfile # Container image definition
|
| 255 |
+
```
|
rl_code_fix_env/__init__.py
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
+
# All rights reserved.
|
| 3 |
+
#
|
| 4 |
+
# This source code is licensed under the BSD-style license found in the
|
| 5 |
+
# LICENSE file in the root directory of this source tree.
|
| 6 |
+
|
| 7 |
+
"""Rl Code Fix Env Environment."""
|
| 8 |
+
|
| 9 |
+
from .models import CodeFixerAction, CodeFixerObservation
|
| 10 |
+
|
| 11 |
+
__all__ = [
|
| 12 |
+
"CodeFixerAction",
|
| 13 |
+
"CodeFixerObservation",
|
| 14 |
+
]
|
rl_code_fix_env/client.py
ADDED
|
@@ -0,0 +1,185 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
+
# All rights reserved.
|
| 3 |
+
#
|
| 4 |
+
# This source code is licensed under the BSD-style license found in the
|
| 5 |
+
# LICENSE file in the root directory of this source tree.
|
| 6 |
+
|
| 7 |
+
"""Code Fixer Environment Client."""
|
| 8 |
+
|
| 9 |
+
import asyncio
|
| 10 |
+
import inspect
|
| 11 |
+
import logging
|
| 12 |
+
from typing import Dict
|
| 13 |
+
|
| 14 |
+
from openenv.core import EnvClient
|
| 15 |
+
from openenv.core.client_types import StepResult
|
| 16 |
+
from openenv.core.env_server.types import State
|
| 17 |
+
|
| 18 |
+
from rl_code_fix_env.models import CodeFixerAction, CodeFixerObservation
|
| 19 |
+
|
| 20 |
+
log = logging.getLogger(__name__)
|
| 21 |
+
|
| 22 |
+
class CodeFixerEnv(
|
| 23 |
+
EnvClient[CodeFixerAction, CodeFixerObservation, State]
|
| 24 |
+
):
|
| 25 |
+
"""
|
| 26 |
+
Client for the Code Fixer Environment.
|
| 27 |
+
|
| 28 |
+
This client maintains a persistent WebSocket connection to the environment server,
|
| 29 |
+
enabling efficient multi-step interactions with lower latency.
|
| 30 |
+
Each client instance has its own dedicated environment session on the server.
|
| 31 |
+
|
| 32 |
+
Example:
|
| 33 |
+
>>> # Connect to a running server
|
| 34 |
+
>>> with CodeFixerEnv(base_url="http://localhost:8000") as client:
|
| 35 |
+
... result = client.reset()
|
| 36 |
+
... print(result.observation.code)
|
| 37 |
+
...
|
| 38 |
+
... result = client.step(CodeFixerAction(type="run_tests"))
|
| 39 |
+
... print(result.observation.test_passed)
|
| 40 |
+
|
| 41 |
+
Example with Docker:
|
| 42 |
+
>>> # Automatically start container and connect
|
| 43 |
+
>>> client = CodeFixerEnv.from_docker_image("code_fixer-env:latest")
|
| 44 |
+
>>> try:
|
| 45 |
+
... result = client.reset()
|
| 46 |
+
... result = client.step(CodeFixerAction(type="run_tests"))
|
| 47 |
+
... finally:
|
| 48 |
+
... client.close()
|
| 49 |
+
"""
|
| 50 |
+
|
| 51 |
+
def __init__(self, *args, **kwargs):
|
| 52 |
+
super().__init__(*args, **kwargs)
|
| 53 |
+
self._loop = asyncio.new_event_loop()
|
| 54 |
+
# Store init args for reconnection
|
| 55 |
+
self._init_args = args
|
| 56 |
+
self._init_kwargs = kwargs
|
| 57 |
+
|
| 58 |
+
def _run_sync(self, result):
|
| 59 |
+
"""Run coroutine results on this client's dedicated event loop."""
|
| 60 |
+
if inspect.iscoroutine(result):
|
| 61 |
+
return self._loop.run_until_complete(result)
|
| 62 |
+
return result
|
| 63 |
+
|
| 64 |
+
def _reconnect(self) -> None:
|
| 65 |
+
"""
|
| 66 |
+
Tear down the dead event loop and WebSocket connection, then
|
| 67 |
+
re-initialise so the next call works cleanly.
|
| 68 |
+
|
| 69 |
+
Called automatically by reset() and step() when a 1011 / timeout
|
| 70 |
+
error is detected after an idle period.
|
| 71 |
+
"""
|
| 72 |
+
log.warning("[CodeFixerEnv] WebSocket timed out reconnecting...")
|
| 73 |
+
# Close the old loop gracefully
|
| 74 |
+
try:
|
| 75 |
+
self._run_sync(super().close())
|
| 76 |
+
except Exception:
|
| 77 |
+
pass
|
| 78 |
+
if not self._loop.is_closed():
|
| 79 |
+
self._loop.close()
|
| 80 |
+
|
| 81 |
+
# Re-initialise: fresh loop + fresh base-class state
|
| 82 |
+
self._loop = asyncio.new_event_loop()
|
| 83 |
+
super().__init__(*self._init_args, **self._init_kwargs)
|
| 84 |
+
log.warning("[CodeFixerEnv] Reconnected successfully.")
|
| 85 |
+
|
| 86 |
+
@staticmethod
|
| 87 |
+
def _is_reconnectable_ws_error(exc: Exception) -> bool:
|
| 88 |
+
err = str(exc).lower()
|
| 89 |
+
reconnect_markers = (
|
| 90 |
+
"1011",
|
| 91 |
+
"1006",
|
| 92 |
+
"keepalive",
|
| 93 |
+
"timed out",
|
| 94 |
+
"closed",
|
| 95 |
+
"close frame",
|
| 96 |
+
"connection closed",
|
| 97 |
+
"connectionclosed",
|
| 98 |
+
"websocket",
|
| 99 |
+
)
|
| 100 |
+
return any(marker in err for marker in reconnect_markers)
|
| 101 |
+
|
| 102 |
+
def reset(self):
|
| 103 |
+
"""Reset the environment auto-reconnects if the WebSocket died."""
|
| 104 |
+
try:
|
| 105 |
+
return self._run_sync(super().reset())
|
| 106 |
+
except Exception as exc:
|
| 107 |
+
if self._is_reconnectable_ws_error(exc):
|
| 108 |
+
self._reconnect()
|
| 109 |
+
return self._run_sync(super().reset()) # one retry
|
| 110 |
+
raise
|
| 111 |
+
|
| 112 |
+
def step(self, action: CodeFixerAction):
|
| 113 |
+
"""Execute a step auto-reconnects if the WebSocket died."""
|
| 114 |
+
try:
|
| 115 |
+
return self._run_sync(super().step(action))
|
| 116 |
+
except Exception as exc:
|
| 117 |
+
if self._is_reconnectable_ws_error(exc):
|
| 118 |
+
self._reconnect()
|
| 119 |
+
return self._run_sync(super().step(action)) # one retry
|
| 120 |
+
raise
|
| 121 |
+
|
| 122 |
+
def close(self):
|
| 123 |
+
"""Close client resources and the dedicated event loop safely."""
|
| 124 |
+
try:
|
| 125 |
+
self._run_sync(super().close())
|
| 126 |
+
finally:
|
| 127 |
+
if not self._loop.is_closed():
|
| 128 |
+
self._loop.close()
|
| 129 |
+
|
| 130 |
+
def _step_payload(self, action: CodeFixerAction) -> Dict:
|
| 131 |
+
"""
|
| 132 |
+
Convert CodeFixerAction to JSON payload for step message.
|
| 133 |
+
|
| 134 |
+
Args:
|
| 135 |
+
action: CodeFixerAction instance
|
| 136 |
+
|
| 137 |
+
Returns:
|
| 138 |
+
Dictionary representation suitable for JSON encoding
|
| 139 |
+
"""
|
| 140 |
+
return {
|
| 141 |
+
"type": action.type,
|
| 142 |
+
"payload": action.payload,
|
| 143 |
+
}
|
| 144 |
+
|
| 145 |
+
def _parse_result(self, payload: Dict) -> StepResult[CodeFixerObservation]:
|
| 146 |
+
"""
|
| 147 |
+
Parse server response into StepResult[CodeFixerObservation].
|
| 148 |
+
|
| 149 |
+
Args:
|
| 150 |
+
payload: JSON response data from server
|
| 151 |
+
|
| 152 |
+
Returns:
|
| 153 |
+
StepResult with CodeFixerObservation
|
| 154 |
+
"""
|
| 155 |
+
obs_data = payload.get("observation", {})
|
| 156 |
+
observation = CodeFixerObservation(
|
| 157 |
+
code=obs_data.get("code", ""),
|
| 158 |
+
logs=obs_data.get("logs"),
|
| 159 |
+
test_score=float(obs_data.get("test_score", 0.0)),
|
| 160 |
+
total_tests=obs_data.get("total_tests", 1),
|
| 161 |
+
steps=obs_data.get("steps", 0),
|
| 162 |
+
done=obs_data.get("done", payload.get("done", False)),
|
| 163 |
+
reward=obs_data.get("reward", payload.get("reward")),
|
| 164 |
+
)
|
| 165 |
+
|
| 166 |
+
return StepResult(
|
| 167 |
+
observation=observation,
|
| 168 |
+
reward=payload.get("reward"),
|
| 169 |
+
done=payload.get("done", False),
|
| 170 |
+
)
|
| 171 |
+
|
| 172 |
+
def _parse_state(self, payload: Dict) -> State:
|
| 173 |
+
"""
|
| 174 |
+
Parse server response into State object.
|
| 175 |
+
|
| 176 |
+
Args:
|
| 177 |
+
payload: JSON response from state request
|
| 178 |
+
|
| 179 |
+
Returns:
|
| 180 |
+
State object with episode_id and step_count
|
| 181 |
+
"""
|
| 182 |
+
return State(
|
| 183 |
+
episode_id=payload.get("episode_id"),
|
| 184 |
+
step_count=payload.get("step_count", 0),
|
| 185 |
+
)
|
rl_code_fix_env/conftest.py
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
conftest.py repo-root pytest configuration.
|
| 3 |
+
|
| 4 |
+
Registers `src.dataset` as a sys.modules alias for `dataset` so that all
|
| 5 |
+
problem test files using `from src.dataset.problem_X.buggy import ...`
|
| 6 |
+
resolve correctly without needing to rename 24 test files.
|
| 7 |
+
|
| 8 |
+
The physical layout is:
|
| 9 |
+
<repo_root>/dataset/problem_X/buggy.py real files
|
| 10 |
+
<repo_root>/src/ has environment/, reward/, etc.
|
| 11 |
+
but NO dataset/ subfolder
|
| 12 |
+
|
| 13 |
+
With PYTHONPATH=<repo_root>:
|
| 14 |
+
import dataset.problem_1.buggy works natively
|
| 15 |
+
import src.dataset.problem_1.buggy would fail fixed here via alias
|
| 16 |
+
"""
|
| 17 |
+
|
| 18 |
+
import sys
|
| 19 |
+
import importlib
|
| 20 |
+
from pathlib import Path
|
| 21 |
+
|
| 22 |
+
_REPO_ROOT = str(Path(__file__).parent)
|
| 23 |
+
if _REPO_ROOT not in sys.path:
|
| 24 |
+
sys.path.insert(0, _REPO_ROOT)
|
| 25 |
+
|
| 26 |
+
import dataset as _real_dataset
|
| 27 |
+
|
| 28 |
+
sys.modules.setdefault("src.dataset", _real_dataset)
|
| 29 |
+
|
| 30 |
+
import pkgutil
|
| 31 |
+
for _pkg in pkgutil.iter_modules(_real_dataset.__path__):
|
| 32 |
+
_full = f"dataset.{_pkg.name}"
|
| 33 |
+
_alias = f"src.dataset.{_pkg.name}"
|
| 34 |
+
try:
|
| 35 |
+
_mod = importlib.import_module(_full)
|
| 36 |
+
sys.modules.setdefault(_alias, _mod)
|
| 37 |
+
except Exception:
|
| 38 |
+
pass
|
rl_code_fix_env/dataset/README.md
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Buggy Training Dataset
|
| 2 |
+
|
| 3 |
+
This dataset is organized as:
|
| 4 |
+
|
| 5 |
+
- `problem_x/buggy.py`: intentionally buggy implementation
|
| 6 |
+
- `problem_x/test.py`: correctness tests that should fail before fixes
|
| 7 |
+
- optional extra modules (`helpers.py`, `cache.py`, etc.) to support multi-file bug fixing
|
| 8 |
+
|
| 9 |
+
Current problems: `problem_1` to `problem_18`.
|
| 10 |
+
|
| 11 |
+
Bug patterns included:
|
| 12 |
+
- off-by-one errors
|
| 13 |
+
- boundary condition mistakes
|
| 14 |
+
- incorrect sorting direction
|
| 15 |
+
- exception handling mistakes
|
| 16 |
+
- state/recency bugs in cache logic
|
| 17 |
+
- recursive base-case bugs
|
| 18 |
+
- parsing and whitespace normalization issues
|
| 19 |
+
- order-preservation regressions
|
| 20 |
+
- matrix transformation direction errors
|
rl_code_fix_env/dataset/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
"""Dataset loading modules."""
|
rl_code_fix_env/dataset/loader.py
ADDED
|
@@ -0,0 +1,111 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Load static, competition-approved tasks."""
|
| 2 |
+
|
| 3 |
+
import os
|
| 4 |
+
import json
|
| 5 |
+
from pathlib import Path
|
| 6 |
+
from typing import Dict, List, Optional
|
| 7 |
+
|
| 8 |
+
# Get the dataset root (same folder as this file)
|
| 9 |
+
DATASET_ROOT = Path(__file__).parent
|
| 10 |
+
|
| 11 |
+
# Hardcoded competition tasks: Easy Medium Hard
|
| 12 |
+
STATIC_TASKS = {
|
| 13 |
+
"easy": {
|
| 14 |
+
"problem_id": "problem_1",
|
| 15 |
+
"difficulty": "easy",
|
| 16 |
+
"description": "String reversal with space normalization",
|
| 17 |
+
},
|
| 18 |
+
"medium": {
|
| 19 |
+
"problem_id": "problem_10",
|
| 20 |
+
"difficulty": "medium",
|
| 21 |
+
"description": "Matrix 90 clockwise rotation",
|
| 22 |
+
},
|
| 23 |
+
"hard": {
|
| 24 |
+
"problem_id": "problem_13",
|
| 25 |
+
"difficulty": "hard",
|
| 26 |
+
"description": "LRU cache with correct eviction policy",
|
| 27 |
+
},
|
| 28 |
+
}
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
def load_problem(problem_id: str) -> Dict[str, any]:
|
| 32 |
+
"""
|
| 33 |
+
Load a single problem from disk.
|
| 34 |
+
|
| 35 |
+
Args:
|
| 36 |
+
problem_id: e.g., "problem_1", "problem_10", "problem_13"
|
| 37 |
+
|
| 38 |
+
Returns:
|
| 39 |
+
{
|
| 40 |
+
"code": str, # buggy.py content
|
| 41 |
+
"tests": str, # test.py path (relative to problem folder)
|
| 42 |
+
"metadata": dict, # metadata.json
|
| 43 |
+
"problem_dir": str, # absolute path to problem folder
|
| 44 |
+
}
|
| 45 |
+
"""
|
| 46 |
+
problem_dir = DATASET_ROOT / problem_id
|
| 47 |
+
|
| 48 |
+
if not problem_dir.exists():
|
| 49 |
+
raise FileNotFoundError(f"Problem directory not found: {problem_dir}")
|
| 50 |
+
|
| 51 |
+
# Load buggy code
|
| 52 |
+
buggy_file = problem_dir / "buggy.py"
|
| 53 |
+
code = buggy_file.read_text(encoding="utf-8")
|
| 54 |
+
|
| 55 |
+
# Load metadata
|
| 56 |
+
metadata_file = problem_dir / "metadata.json"
|
| 57 |
+
metadata = json.loads(metadata_file.read_text(encoding="utf-8"))
|
| 58 |
+
|
| 59 |
+
# Test file path (relative to problem root)
|
| 60 |
+
test_path = str(problem_dir / "test.py")
|
| 61 |
+
|
| 62 |
+
return {
|
| 63 |
+
"code": code,
|
| 64 |
+
"tests": test_path,
|
| 65 |
+
"metadata": metadata,
|
| 66 |
+
"problem_dir": str(problem_dir),
|
| 67 |
+
"problem_id": problem_id,
|
| 68 |
+
}
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
def get_hardcoded_task(difficulty: str) -> Dict[str, any]:
|
| 72 |
+
"""
|
| 73 |
+
Get one of the three static competition tasks.
|
| 74 |
+
|
| 75 |
+
Args:
|
| 76 |
+
difficulty: "easy" | "medium" | "hard"
|
| 77 |
+
|
| 78 |
+
Returns:
|
| 79 |
+
Task dict with code, tests, metadata
|
| 80 |
+
|
| 81 |
+
Raises:
|
| 82 |
+
ValueError: if difficulty is not one of the three approved values
|
| 83 |
+
"""
|
| 84 |
+
if difficulty not in STATIC_TASKS:
|
| 85 |
+
raise ValueError(
|
| 86 |
+
f"Invalid difficulty '{difficulty}'. "
|
| 87 |
+
f"Must be one of: {list(STATIC_TASKS.keys())}"
|
| 88 |
+
)
|
| 89 |
+
|
| 90 |
+
task_info = STATIC_TASKS[difficulty]
|
| 91 |
+
problem_id = task_info["problem_id"]
|
| 92 |
+
|
| 93 |
+
return load_problem(problem_id)
|
| 94 |
+
|
| 95 |
+
|
| 96 |
+
def get_random_tasks():
|
| 97 |
+
"""
|
| 98 |
+
DEPRECATED: Use get_hardcoded_task() instead.
|
| 99 |
+
Kept for backward compatibility.
|
| 100 |
+
"""
|
| 101 |
+
import warnings
|
| 102 |
+
warnings.warn(
|
| 103 |
+
"get_random_tasks() is deprecated. Use get_hardcoded_task('easy'|'medium'|'hard')",
|
| 104 |
+
DeprecationWarning,
|
| 105 |
+
stacklevel=2
|
| 106 |
+
)
|
| 107 |
+
# Return a default (easy)
|
| 108 |
+
return get_hardcoded_task("easy")
|
| 109 |
+
|
| 110 |
+
|
| 111 |
+
|
rl_code_fix_env/dataset/problem_1/buggy.py
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
def reverse_words(text: str) -> str:
|
| 2 |
+
"""Return the words in reverse order."""
|
| 3 |
+
# BUG: split(" ") keeps empty items for repeated spaces.
|
| 4 |
+
words = text.split(" ")
|
| 5 |
+
return " ".join(reversed(words))
|
rl_code_fix_env/dataset/problem_1/metadata.json
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"difficulty": "easy",
|
| 3 |
+
"bug_type": "string-splitting",
|
| 4 |
+
"expected_steps": 1
|
| 5 |
+
}
|
rl_code_fix_env/dataset/problem_1/test.py
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import unittest
|
| 2 |
+
from src.dataset.problem_1.buggy import reverse_words
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
class TestReverseWords(unittest.TestCase):
|
| 6 |
+
def test_simple(self):
|
| 7 |
+
self.assertEqual(reverse_words("hello world"), "world hello")
|
| 8 |
+
|
| 9 |
+
def test_multiple_spaces(self):
|
| 10 |
+
self.assertEqual(reverse_words("one two three"), "three two one")
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
if __name__ == "__main__":
|
| 14 |
+
unittest.main()
|
rl_code_fix_env/dataset/problem_10/buggy.py
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from src.dataset.problem_10.helpers import transpose
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
def rotate_90_clockwise(matrix: list[list[int]]) -> list[list[int]]:
|
| 5 |
+
"""Rotate matrix 90 degrees clockwise."""
|
| 6 |
+
t = transpose(matrix)
|
| 7 |
+
# BUG: this is counter-clockwise.
|
| 8 |
+
return t[::-1]
|
rl_code_fix_env/dataset/problem_10/helpers.py
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
def transpose(matrix: list[list[int]]) -> list[list[int]]:
|
| 2 |
+
return [list(row) for row in zip(*matrix)]
|
rl_code_fix_env/dataset/problem_10/metadata.json
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"difficulty": "medium",
|
| 3 |
+
"bug_type": "matrix-transformation",
|
| 4 |
+
"expected_steps": 1
|
| 5 |
+
}
|
rl_code_fix_env/dataset/problem_10/test.py
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import unittest
|
| 2 |
+
from src.dataset.problem_10.buggy import rotate_90_clockwise
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
class TestRotateMatrix(unittest.TestCase):
|
| 6 |
+
def test_2x2(self):
|
| 7 |
+
matrix = [[1, 2], [3, 4]]
|
| 8 |
+
self.assertEqual(rotate_90_clockwise(matrix), [[3, 1], [4, 2]])
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
if __name__ == "__main__":
|
| 12 |
+
unittest.main()
|
rl_code_fix_env/dataset/problem_11/buggy.py
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
def binary_search(nums: list[int], target: int) -> int:
|
| 2 |
+
"""Return index of target, or -1 if not found."""
|
| 3 |
+
left, right = 0, len(nums) - 1
|
| 4 |
+
|
| 5 |
+
while left < right:
|
| 6 |
+
mid = (left + right) // 2
|
| 7 |
+
if nums[mid] == target:
|
| 8 |
+
return mid
|
| 9 |
+
if nums[mid] < target:
|
| 10 |
+
left = mid + 1
|
| 11 |
+
else:
|
| 12 |
+
right = mid - 1
|
| 13 |
+
|
| 14 |
+
return -1
|
rl_code_fix_env/dataset/problem_11/metadata.json
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"difficulty": "medium",
|
| 3 |
+
"bug_type": "boundary-condition",
|
| 4 |
+
"expected_steps": 2
|
| 5 |
+
}
|
rl_code_fix_env/dataset/problem_11/test.py
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import unittest
|
| 2 |
+
from src.dataset.problem_11.buggy import binary_search
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
class TestBinarySearch(unittest.TestCase):
|
| 6 |
+
def test_found_middle(self):
|
| 7 |
+
self.assertEqual(binary_search([1, 3, 5, 7], 5), 2)
|
| 8 |
+
|
| 9 |
+
def test_found_last(self):
|
| 10 |
+
self.assertEqual(binary_search([1, 3, 5, 7], 7), 3)
|
| 11 |
+
|
| 12 |
+
def test_not_found(self):
|
| 13 |
+
self.assertEqual(binary_search([1, 3, 5, 7], 4), -1)
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
if __name__ == "__main__":
|
| 17 |
+
unittest.main()
|
rl_code_fix_env/dataset/problem_12/buggy.py
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
def parse_pairs(raw: str) -> dict[str, int]:
|
| 2 |
+
"""Parse strings like 'a=1,b=2' into a dict."""
|
| 3 |
+
result = {}
|
| 4 |
+
if not raw:
|
| 5 |
+
return result
|
| 6 |
+
|
| 7 |
+
for segment in raw.split(","):
|
| 8 |
+
key, value = segment.split("=")
|
| 9 |
+
# BUG: does not strip whitespace around keys/values.
|
| 10 |
+
result[key] = int(value)
|
| 11 |
+
return result
|
rl_code_fix_env/dataset/problem_12/metadata.json
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"difficulty": "easy",
|
| 3 |
+
"bug_type": "string-normalization",
|
| 4 |
+
"expected_steps": 2
|
| 5 |
+
}
|
rl_code_fix_env/dataset/problem_12/test.py
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import unittest
|
| 2 |
+
from src.dataset.problem_12.buggy import parse_pairs
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
class TestParsePairs(unittest.TestCase):
|
| 6 |
+
def test_simple(self):
|
| 7 |
+
self.assertEqual(parse_pairs("a=1,b=2"), {"a": 1, "b": 2})
|
| 8 |
+
|
| 9 |
+
def test_spaces(self):
|
| 10 |
+
self.assertEqual(parse_pairs("x = 10, y = 20"), {"x": 10, "y": 20})
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
if __name__ == "__main__":
|
| 14 |
+
unittest.main()
|
rl_code_fix_env/dataset/problem_13/buggy.py
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from src.dataset.problem_13.cache import LRUCache
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
def run_ops() -> tuple[int, int]:
|
| 5 |
+
cache = LRUCache(2)
|
| 6 |
+
cache.put("a", 1)
|
| 7 |
+
cache.put("b", 2)
|
| 8 |
+
_ = cache.get("a")
|
| 9 |
+
cache.put("c", 3)
|
| 10 |
+
return cache.get("a"), cache.get("b")
|
rl_code_fix_env/dataset/problem_13/cache.py
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from collections import OrderedDict
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
class LRUCache:
|
| 5 |
+
def __init__(self, capacity: int):
|
| 6 |
+
self.capacity = capacity
|
| 7 |
+
self.store: OrderedDict[str, int] = OrderedDict()
|
| 8 |
+
|
| 9 |
+
def get(self, key: str) -> int:
|
| 10 |
+
if key not in self.store:
|
| 11 |
+
return -1
|
| 12 |
+
# BUG: does not refresh recency when key is accessed.
|
| 13 |
+
return self.store[key]
|
| 14 |
+
|
| 15 |
+
def put(self, key: str, value: int) -> None:
|
| 16 |
+
if key in self.store:
|
| 17 |
+
self.store.pop(key)
|
| 18 |
+
self.store[key] = value
|
| 19 |
+
if len(self.store) > self.capacity:
|
| 20 |
+
self.store.popitem(last=False)
|
rl_code_fix_env/dataset/problem_13/metadata.json
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"difficulty": "hard",
|
| 3 |
+
"bug_type": "state-logic",
|
| 4 |
+
"expected_steps": 2
|
| 5 |
+
}
|
rl_code_fix_env/dataset/problem_13/test.py
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import unittest
|
| 2 |
+
from src.dataset.problem_13.buggy import run_ops
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
class TestLRU(unittest.TestCase):
|
| 6 |
+
def test_recency_update_on_get(self):
|
| 7 |
+
a, b = run_ops()
|
| 8 |
+
self.assertEqual(a, 1)
|
| 9 |
+
self.assertEqual(b, -1)
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
if __name__ == "__main__":
|
| 13 |
+
unittest.main()
|
rl_code_fix_env/dataset/problem_14/buggy.py
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
def fibonacci_recursive(n: int) -> int:
|
| 2 |
+
"""Return nth Fibonacci number."""
|
| 3 |
+
# BUG: wrong base case for n == 0.
|
| 4 |
+
if n <= 1:
|
| 5 |
+
return 1
|
| 6 |
+
return fibonacci_recursive(n - 1) + fibonacci_recursive(n - 2)
|
rl_code_fix_env/dataset/problem_14/metadata.json
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"difficulty": "easy",
|
| 3 |
+
"bug_type": "recursion-base-case",
|
| 4 |
+
"expected_steps": 2
|
| 5 |
+
}
|
rl_code_fix_env/dataset/problem_14/test.py
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import unittest
|
| 2 |
+
from src.dataset.problem_14.buggy import fibonacci_recursive
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
class TestFibonacciRecursive(unittest.TestCase):
|
| 6 |
+
def test_base_cases(self):
|
| 7 |
+
self.assertEqual(fibonacci_recursive(0), 0)
|
| 8 |
+
self.assertEqual(fibonacci_recursive(1), 1)
|
| 9 |
+
|
| 10 |
+
def test_n5(self):
|
| 11 |
+
self.assertEqual(fibonacci_recursive(5), 5)
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
if __name__ == "__main__":
|
| 15 |
+
unittest.main()
|
rl_code_fix_env/dataset/problem_15/buggy.py
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
def has_overlap(a: tuple[int, int], b: tuple[int, int]) -> bool:
|
| 2 |
+
"""Check if closed intervals [a0, a1] and [b0, b1] overlap."""
|
| 3 |
+
# BUG: uses strict inequalities, missing touching-boundary overlap.
|
| 4 |
+
return a[0] < b[1] and b[0] < a[1]
|
rl_code_fix_env/dataset/problem_15/metadata.json
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"difficulty": "medium",
|
| 3 |
+
"bug_type": "boundary-condition",
|
| 4 |
+
"expected_steps": 1
|
| 5 |
+
}
|
rl_code_fix_env/dataset/problem_15/test.py
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import unittest
|
| 2 |
+
from src.dataset.problem_15.buggy import has_overlap
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
class TestIntervalOverlap(unittest.TestCase):
|
| 6 |
+
def test_overlapping(self):
|
| 7 |
+
self.assertTrue(has_overlap((1, 5), (4, 9)))
|
| 8 |
+
|
| 9 |
+
def test_touching_endpoints(self):
|
| 10 |
+
self.assertTrue(has_overlap((1, 3), (3, 7)))
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
if __name__ == "__main__":
|
| 14 |
+
unittest.main()
|
rl_code_fix_env/dataset/problem_16/buggy.py
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from src.dataset.problem_16.helpers import normalize_scores
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
def top_label(scores: dict[str, float]) -> str:
|
| 5 |
+
"""Return label with highest normalized probability."""
|
| 6 |
+
labels = list(scores.keys())
|
| 7 |
+
probs = normalize_scores(list(scores.values()))
|
| 8 |
+
# BUG: chooses min instead of max.
|
| 9 |
+
idx = min(range(len(probs)), key=lambda i: probs[i])
|
| 10 |
+
return labels[idx]
|
rl_code_fix_env/dataset/problem_16/helpers.py
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
def normalize_scores(scores: list[float]) -> list[float]:
|
| 2 |
+
total = sum(scores)
|
| 3 |
+
return [s / total for s in scores]
|
rl_code_fix_env/dataset/problem_16/metadata.json
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"difficulty": "easy",
|
| 3 |
+
"bug_type": "logic-error",
|
| 4 |
+
"expected_steps": 1
|
| 5 |
+
}
|
rl_code_fix_env/dataset/problem_16/test.py
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import unittest
|
| 2 |
+
from src.dataset.problem_16.buggy import top_label
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
class TestTopLabel(unittest.TestCase):
|
| 6 |
+
def test_select_highest(self):
|
| 7 |
+
scores = {"cat": 0.2, "dog": 0.7, "bird": 0.1}
|
| 8 |
+
self.assertEqual(top_label(scores), "dog")
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
if __name__ == "__main__":
|
| 12 |
+
unittest.main()
|
rl_code_fix_env/dataset/problem_17/buggy.py
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
def dedupe_preserve_order(items: list[int]) -> list[int]:
|
| 2 |
+
"""Remove duplicates while preserving first occurrence order."""
|
| 3 |
+
seen = set()
|
| 4 |
+
out = []
|
| 5 |
+
for item in items:
|
| 6 |
+
# BUG: keeps last occurrence logic effectively by replacing list.
|
| 7 |
+
if item in seen:
|
| 8 |
+
out = [x for x in out if x != item]
|
| 9 |
+
seen.add(item)
|
| 10 |
+
out.append(item)
|
| 11 |
+
return out
|
rl_code_fix_env/dataset/problem_17/metadata.json
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"difficulty": "medium",
|
| 3 |
+
"bug_type": "logic-error",
|
| 4 |
+
"expected_steps": 2
|
| 5 |
+
}
|
rl_code_fix_env/dataset/problem_17/test.py
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import unittest
|
| 2 |
+
from src.dataset.problem_17.buggy import dedupe_preserve_order
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
class TestDedupe(unittest.TestCase):
|
| 6 |
+
def test_order(self):
|
| 7 |
+
self.assertEqual(dedupe_preserve_order([1, 2, 1, 3, 2]), [1, 2, 3])
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
if __name__ == "__main__":
|
| 11 |
+
unittest.main()
|
rl_code_fix_env/dataset/problem_18/buggy.py
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from src.dataset.problem_18.math_utils import clamp
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
def moving_average(nums: list[int], window: int) -> list[float]:
|
| 5 |
+
"""Simple moving average over a fixed window."""
|
| 6 |
+
if window <= 0:
|
| 7 |
+
raise ValueError("window must be positive")
|
| 8 |
+
|
| 9 |
+
window = clamp(window, 1, len(nums))
|
| 10 |
+
out = []
|
| 11 |
+
# BUG: end index is off-by-one; misses final valid window.
|
| 12 |
+
for i in range(0, len(nums) - window):
|
| 13 |
+
out.append(sum(nums[i : i + window]) / window)
|
| 14 |
+
return out
|
rl_code_fix_env/dataset/problem_18/math_utils.py
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
def clamp(value: int, low: int, high: int) -> int:
|
| 2 |
+
if value < low:
|
| 3 |
+
return low
|
| 4 |
+
if value > high:
|
| 5 |
+
return high
|
| 6 |
+
return value
|
rl_code_fix_env/dataset/problem_18/metadata.json
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"difficulty": "medium",
|
| 3 |
+
"bug_type": "off-by-one",
|
| 4 |
+
"expected_steps": 1
|
| 5 |
+
}
|
rl_code_fix_env/dataset/problem_18/test.py
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import unittest
|
| 2 |
+
from src.dataset.problem_18.buggy import moving_average
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
class TestMovingAverage(unittest.TestCase):
|
| 6 |
+
def test_window_3(self):
|
| 7 |
+
self.assertEqual(moving_average([1, 2, 3, 4, 5], 3), [2.0, 3.0, 4.0])
|
| 8 |
+
|
| 9 |
+
def test_window_larger_than_data(self):
|
| 10 |
+
self.assertEqual(moving_average([2, 4], 5), [3.0])
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
if __name__ == "__main__":
|
| 14 |
+
unittest.main()
|
rl_code_fix_env/dataset/problem_19/buggy.py
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
def calculate_employee_bonus(employees: list[dict], metrics: dict) -> list[dict]:
|
| 2 |
+
"""
|
| 3 |
+
Calculate employee bonuses based on their base salary, performance rating,
|
| 4 |
+
and company-wide metrics.
|
| 5 |
+
|
| 6 |
+
employees: list of dicts with 'id', 'role', 'base_salary', 'rating' (1-5)
|
| 7 |
+
metrics: dict with 'company_multiplier' and 'department_multipliers'
|
| 8 |
+
|
| 9 |
+
Returns a list of dicts with 'id' and 'bonus'.
|
| 10 |
+
"""
|
| 11 |
+
results = []
|
| 12 |
+
|
| 13 |
+
for emp in employees:
|
| 14 |
+
# BUG 1: Division by zero risk if rating is 0 or missing, and type mismatch if salary is string
|
| 15 |
+
base = emp.get('base_salary', 0)
|
| 16 |
+
rating = emp.get('rating', 1)
|
| 17 |
+
|
| 18 |
+
# BUG 2: Incorrect logic for role based multiplier, using assignment instead of lookup
|
| 19 |
+
role_mult = metrics.get('department_multipliers', {})[emp.get('role')] # will raise KeyError if role not found
|
| 20 |
+
|
| 21 |
+
# Calculate base bonus
|
| 22 |
+
if rating > 3:
|
| 23 |
+
base_bonus = base * 0.1
|
| 24 |
+
elif rating == 3:
|
| 25 |
+
base_bonus = base * 0.05
|
| 26 |
+
else:
|
| 27 |
+
base_bonus = 0
|
| 28 |
+
|
| 29 |
+
# BUG 3: Does not apply company multiplier correctly to the total
|
| 30 |
+
total_bonus = base_bonus * role_mult + metrics.get('company_multiplier', 1)
|
| 31 |
+
|
| 32 |
+
# BUG 4: mutating original dict instead of creating new one
|
| 33 |
+
emp['bonus'] = total_bonus
|
| 34 |
+
results.append(emp)
|
| 35 |
+
|
| 36 |
+
return results
|
rl_code_fix_env/dataset/problem_19/metadata.json
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"difficulty": "hard",
|
| 3 |
+
"bug_type": "multiple",
|
| 4 |
+
"expected_steps": 4
|
| 5 |
+
}
|
rl_code_fix_env/dataset/problem_19/test.py
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pytest
|
| 2 |
+
from src.dataset.problem_19.buggy import calculate_employee_bonus
|
| 3 |
+
|
| 4 |
+
def test_calculate_employee_bonus():
|
| 5 |
+
employees = [
|
| 6 |
+
{'id': 1, 'role': 'engineering', 'base_salary': 100000, 'rating': 4},
|
| 7 |
+
{'id': 2, 'role': 'sales', 'base_salary': '80000', 'rating': 3},
|
| 8 |
+
{'id': 3, 'role': 'hr', 'base_salary': 60000, 'rating': 2},
|
| 9 |
+
{'id': 4, 'role': 'unknown', 'base_salary': 50000, 'rating': 5}
|
| 10 |
+
]
|
| 11 |
+
|
| 12 |
+
metrics = {
|
| 13 |
+
'company_multiplier': 1.2,
|
| 14 |
+
'department_multipliers': {
|
| 15 |
+
'engineering': 1.5,
|
| 16 |
+
'sales': 1.2,
|
| 17 |
+
'hr': 1.0
|
| 18 |
+
}
|
| 19 |
+
}
|
| 20 |
+
|
| 21 |
+
# Original dicts should not be modified
|
| 22 |
+
orig_employees = [dict(e) for e in employees]
|
| 23 |
+
|
| 24 |
+
results = calculate_employee_bonus(employees, metrics)
|
| 25 |
+
|
| 26 |
+
# Check if original was modified
|
| 27 |
+
assert employees == orig_employees, "Original list was mutated"
|
| 28 |
+
|
| 29 |
+
# Check results format
|
| 30 |
+
assert len(results) == 4
|
| 31 |
+
for r in results:
|
| 32 |
+
assert 'id' in r
|
| 33 |
+
assert 'bonus' in r
|
| 34 |
+
assert 'role' not in r # Should only contain id and bonus
|
| 35 |
+
|
| 36 |
+
# Check values
|
| 37 |
+
# Emp 1: 100000 * 0.1 * 1.5 * 1.2 = 18000
|
| 38 |
+
assert results[0]['bonus'] == 18000
|
| 39 |
+
|
| 40 |
+
# Emp 2: 80000 * 0.05 * 1.2 * 1.2 = 5760 (string salary handling)
|
| 41 |
+
assert results[1]['bonus'] == 5760
|
| 42 |
+
|
| 43 |
+
# Emp 3: 0 bonus due to rating 2
|
| 44 |
+
assert results[2]['bonus'] == 0
|
| 45 |
+
|
| 46 |
+
# Emp 4: unknown role falls back to 1.0 multiplier
|
| 47 |
+
# 50000 * 0.1 * 1.0 * 1.2 = 6000
|
| 48 |
+
assert results[3]['bonus'] == 6000
|
rl_code_fix_env/dataset/problem_2/buggy.py
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
def is_palindrome(text: str) -> bool:
|
| 2 |
+
"""Check whether text is a palindrome."""
|
| 3 |
+
# BUG: does not normalize case or skip non-alphanumeric chars.
|
| 4 |
+
cleaned = text.strip()
|
| 5 |
+
return cleaned == cleaned[::-1]
|