Spaces:

Viraj0112
/

rl_code_fix_env

Sleeping

App Files Files Community

Viraaj Sawant commited on Apr 8

Commit

8a4b89f

0 Parent(s):

Initial push of Mini RL Env

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitignore +8 -0
prompts.py +37 -0
requirements.txt +26 -0
rl_code_fix_env/.dockerignore +45 -0
rl_code_fix_env/.gitignore +8 -0
rl_code_fix_env/README.md +255 -0
rl_code_fix_env/__init__.py +14 -0
rl_code_fix_env/client.py +185 -0
rl_code_fix_env/conftest.py +38 -0
rl_code_fix_env/dataset/README.md +20 -0
rl_code_fix_env/dataset/__init__.py +1 -0
rl_code_fix_env/dataset/loader.py +111 -0
rl_code_fix_env/dataset/problem_1/buggy.py +5 -0
rl_code_fix_env/dataset/problem_1/metadata.json +5 -0
rl_code_fix_env/dataset/problem_1/test.py +14 -0
rl_code_fix_env/dataset/problem_10/buggy.py +8 -0
rl_code_fix_env/dataset/problem_10/helpers.py +2 -0
rl_code_fix_env/dataset/problem_10/metadata.json +5 -0
rl_code_fix_env/dataset/problem_10/test.py +12 -0
rl_code_fix_env/dataset/problem_11/buggy.py +14 -0
rl_code_fix_env/dataset/problem_11/metadata.json +5 -0
rl_code_fix_env/dataset/problem_11/test.py +17 -0
rl_code_fix_env/dataset/problem_12/buggy.py +11 -0
rl_code_fix_env/dataset/problem_12/metadata.json +5 -0
rl_code_fix_env/dataset/problem_12/test.py +14 -0
rl_code_fix_env/dataset/problem_13/buggy.py +10 -0
rl_code_fix_env/dataset/problem_13/cache.py +20 -0
rl_code_fix_env/dataset/problem_13/metadata.json +5 -0
rl_code_fix_env/dataset/problem_13/test.py +13 -0
rl_code_fix_env/dataset/problem_14/buggy.py +6 -0
rl_code_fix_env/dataset/problem_14/metadata.json +5 -0
rl_code_fix_env/dataset/problem_14/test.py +15 -0
rl_code_fix_env/dataset/problem_15/buggy.py +4 -0
rl_code_fix_env/dataset/problem_15/metadata.json +5 -0
rl_code_fix_env/dataset/problem_15/test.py +14 -0
rl_code_fix_env/dataset/problem_16/buggy.py +10 -0
rl_code_fix_env/dataset/problem_16/helpers.py +3 -0
rl_code_fix_env/dataset/problem_16/metadata.json +5 -0
rl_code_fix_env/dataset/problem_16/test.py +12 -0
rl_code_fix_env/dataset/problem_17/buggy.py +11 -0
rl_code_fix_env/dataset/problem_17/metadata.json +5 -0
rl_code_fix_env/dataset/problem_17/test.py +11 -0
rl_code_fix_env/dataset/problem_18/buggy.py +14 -0
rl_code_fix_env/dataset/problem_18/math_utils.py +6 -0
rl_code_fix_env/dataset/problem_18/metadata.json +5 -0
rl_code_fix_env/dataset/problem_18/test.py +14 -0
rl_code_fix_env/dataset/problem_19/buggy.py +36 -0
rl_code_fix_env/dataset/problem_19/metadata.json +5 -0
rl_code_fix_env/dataset/problem_19/test.py +48 -0
rl_code_fix_env/dataset/problem_2/buggy.py +5 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,8 @@

+*.pdf
+venv/
+.venv/
+__pycache__/
+.env
+commands.md
+logs.md
+inference&docker.md

prompts.py ADDED Viewed

	@@ -0,0 +1,37 @@

+LLM_SCORER_PROMPT = """
+    You are a reward model for an autonomous code bug-fixing agent trained with reinforcement learning.
+    Your scores are used directly as a learning signal — be precise, consistent, and strict.
+    You will receive:
+    - ORIGINAL: the buggy code before the agent's fix
+    - PATCHED: the code after the agent applied its patch
+    Evaluate the agent's fix on exactly three axes, each scored 0.0–10.0:
+    1. CORRECTNESS  — Does the patch fix the bug(s) without introducing new ones?
+                      Full marks only if the fix is semantically correct and complete.
+                      Penalise partial fixes, over-patches, or fixes that mask rather than resolve the root cause.
+    2. MINIMALITY   — Is the diff minimal? Penalise unnecessary refactors, renames, whitespace-only changes,
+                      or reformatting of lines unrelated to the bug.
+    3. QUALITY      — Is the patched code readable and idiomatic? Penalise: broken naming conventions,
+                      added dead code, removed necessary comments, or degraded clarity vs. the original.
+    Respond ONLY with this JSON — no preamble, no trailing text:
+    {
+      "correctness": <float 0.0-10.0>,
+      "minimality":  <float 0.0-10.0>,
+      "quality":     <float 0.0-10.0>,
+      "reasoning":   "<one concise sentence per axis, pipe-separated>"
+    }
+"""
+USER_TEMPLATE ="""
+    ORIGINAL:
+    ```python
+    {original_code}
+    ```
+    Return only the JSON.
+"""

requirements.txt ADDED Viewed

	@@ -0,0 +1,26 @@

+datasets
+gymnasium
+rich
+tqdm
+matplotlib
+seaborn
+pandas
+numpy
+openenv-core
+fastapi
+requests
+uvicorn
+pydantic
+streamlit
+groq
+langchain
+langchain-core
+huggingface_hub
+loguru
+pytest
+unidiff
+diff-match-patch

rl_code_fix_env/.dockerignore ADDED Viewed

	@@ -0,0 +1,45 @@

+# Virtual environments (Windows/Linux/Mac)
+.venv/
+venv/
+env/
+ENV/
+.env
+# Python cache & compiled
+__pycache__/
+*.pyc
+*.pyo
+*.pyd
+.Python
+*.egg-info/
+dist/
+build/
+*.egg
+venv
+.venv
+# Testing & coverage
+.pytest_cache/
+.coverage
+htmlcov/
+# IDE & editor
+.vscode/
+.idea/
+*.swp
+*.swo
+*~
+.DS_Store
+# Version control
+.git/
+.gitignore
+# Build/cache
+.mypy_cache/
+*.log
+# Docker
+Dockerfile
+.dockerignore
+docker-compose.yml

rl_code_fix_env/.gitignore ADDED Viewed

	@@ -0,0 +1,8 @@

+*.pdf
+venv/
+.venv/
+__pycache__/
+.env
+*.pyc
+*.egg
+pytest-cache-files-*/

rl_code_fix_env/README.md ADDED Viewed

	@@ -0,0 +1,255 @@

+---
+title: Rl Code Fix Env Environment Server
+emoji:
+colorFrom: green
+colorTo: purple
+sdk: docker
+pinned: false
+app_port: 8000
+base_path: /web
+tags:
+  - openenv
+---
+# Rl Code Fix Env Environment
+A simple test environment that echoes back messages. Perfect for testing the env APIs as well as demonstrating environment usage patterns.
+## Quick Start
+The simplest way to use the Rl Code Fix Env environment is through the `RlCodeFixEnv` class:
+```python
+from rl_code_fix_env import RlCodeFixAction, RlCodeFixEnv
+try:
+    # Create environment from Docker image
+    rl_code_fix_envenv = RlCodeFixEnv.from_docker_image("rl_code_fix_env-env:latest")
+    # Reset
+    result = rl_code_fix_envenv.reset()
+    print(f"Reset: {result.observation.echoed_message}")
+    # Send multiple messages
+    messages = ["Hello, World!", "Testing echo", "Final message"]
+    for msg in messages:
+        result = rl_code_fix_envenv.step(RlCodeFixAction(message=msg))
+        print(f"Sent: '{msg}'")
+        print(f"   Echoed: '{result.observation.echoed_message}'")
+        print(f"   Length: {result.observation.message_length}")
+        print(f"   Reward: {result.reward}")
+finally:
+    # Always clean up
+    rl_code_fix_envenv.close()
+```
+That's it! The `RlCodeFixEnv.from_docker_image()` method handles:
+- Starting the Docker container
+- Waiting for the server to be ready
+- Connecting to the environment
+- Container cleanup when you call `close()`
+## Building the Docker Image
+Before using the environment, you need to build the Docker image:
+```bash
+# From project root
+docker build -t rl_code_fix_env-env:latest -f server/Dockerfile .
+```
+## Deploying to Hugging Face Spaces
+You can easily deploy your OpenEnv environment to Hugging Face Spaces using the `openenv push` command:
+```bash
+# From the environment directory (where openenv.yaml is located)
+openenv push
+# Or specify options
+openenv push --namespace my-org --private
+```
+The `openenv push` command will:
+1. Validate that the directory is an OpenEnv environment (checks for `openenv.yaml`)
+2. Prepare a custom build for Hugging Face Docker space (enables web interface)
+3. Upload to Hugging Face (ensuring you're logged in)
+### Prerequisites
+- Authenticate with Hugging Face: The command will prompt for login if not already authenticated
+### Options
+- `--directory`, `-d`: Directory containing the OpenEnv environment (defaults to current directory)
+- `--repo-id`, `-r`: Repository ID in format 'username/repo-name' (defaults to 'username/env-name' from openenv.yaml)
+- `--base-image`, `-b`: Base Docker image to use (overrides Dockerfile FROM)
+- `--private`: Deploy the space as private (default: public)
+### Examples
+```bash
+# Push to your personal namespace (defaults to username/env-name from openenv.yaml)
+openenv push
+# Push to a specific repository
+openenv push --repo-id my-org/my-env
+# Push with a custom base image
+openenv push --base-image ghcr.io/meta-pytorch/openenv-base:latest
+# Push as a private space
+openenv push --private
+# Combine options
+openenv push --repo-id my-org/my-env --base-image custom-base:latest --private
+```
+After deployment, your space will be available at:
+`https://huggingface.co/spaces/<repo-id>`
+The deployed space includes:
+- **Web Interface** at `/web` - Interactive UI for exploring the environment
+- **API Documentation** at `/docs` - Full OpenAPI/Swagger interface
+- **Health Check** at `/health` - Container health monitoring
+- **WebSocket** at `/ws` - Persistent session endpoint for low-latency interactions
+## Environment Details
+### Action
+**RlCodeFixAction**: Contains a single field
+- `message` (str) - The message to echo back
+### Observation
+**RlCodeFixObservation**: Contains the echo response and metadata
+- `echoed_message` (str) - The message echoed back
+- `message_length` (int) - Length of the message
+- `reward` (float) - Reward based on message length (length  0.1)
+- `done` (bool) - Always False for echo environment
+- `metadata` (dict) - Additional info like step count
+### Reward
+The reward is calculated as: `message_length  0.1`
+- "Hi"  reward: 0.2
+- "Hello, World!"  reward: 1.3
+- Empty message  reward: 0.0
+## Advanced Usage
+### Connecting to an Existing Server
+If you already have a Rl Code Fix Env environment server running, you can connect directly:
+```python
+from rl_code_fix_env import RlCodeFixEnv
+# Connect to existing server
+rl_code_fix_envenv = RlCodeFixEnv(base_url="<ENV_HTTP_URL_HERE>")
+# Use as normal
+result = rl_code_fix_envenv.reset()
+result = rl_code_fix_envenv.step(RlCodeFixAction(message="Hello!"))
+```
+Note: When connecting to an existing server, `rl_code_fix_envenv.close()` will NOT stop the server.
+### Using the Context Manager
+The client supports context manager usage for automatic connection management:
+```python
+from rl_code_fix_env import RlCodeFixAction, RlCodeFixEnv
+# Connect with context manager (auto-connects and closes)
+with RlCodeFixEnv(base_url="http://localhost:8000") as env:
+    result = env.reset()
+    print(f"Reset: {result.observation.echoed_message}")
+    # Multiple steps with low latency
+    for msg in ["Hello", "World", "!"]:
+        result = env.step(RlCodeFixAction(message=msg))
+        print(f"Echoed: {result.observation.echoed_message}")
+```
+The client uses WebSocket connections for:
+- **Lower latency**: No HTTP connection overhead per request
+- **Persistent session**: Server maintains your environment state
+- **Efficient for episodes**: Better for many sequential steps
+### Concurrent WebSocket Sessions
+The server supports multiple concurrent WebSocket connections. To enable this,
+modify `server/app.py` to use factory mode:
+```python
+# In server/app.py - use factory mode for concurrent sessions
+app = create_app(
+    RlCodeFixEnvironment,  # Pass class, not instance
+    RlCodeFixAction,
+    RlCodeFixObservation,
+    max_concurrent_envs=4,  # Allow 4 concurrent sessions
+)
+```
+Then multiple clients can connect simultaneously:
+```python
+from rl_code_fix_env import RlCodeFixAction, RlCodeFixEnv
+from concurrent.futures import ThreadPoolExecutor
+def run_episode(client_id: int):
+    with RlCodeFixEnv(base_url="http://localhost:8000") as env:
+        result = env.reset()
+        for i in range(10):
+            result = env.step(RlCodeFixAction(message=f"Client {client_id}, step {i}"))
+        return client_id, result.observation.message_length
+# Run 4 episodes concurrently
+with ThreadPoolExecutor(max_workers=4) as executor:
+    results = list(executor.map(run_episode, range(4)))
+```
+## Development & Testing
+### Direct Environment Testing
+Test the environment logic directly without starting the HTTP server:
+```bash
+# From the server directory
+python3 server/rl_code_fix_env_environment.py
+```
+This verifies that:
+- Environment resets correctly
+- Step executes actions properly
+- State tracking works
+- Rewards are calculated correctly
+### Running Locally
+Run the server locally for development:
+```bash
+uvicorn server.app:app --reload
+```
+## Project Structure
+```
+rl_code_fix_env/
+ .dockerignore         # Docker build exclusions
+ __init__.py            # Module exports
+ README.md              # This file
+ openenv.yaml           # OpenEnv manifest
+ pyproject.toml         # Project metadata and dependencies
+ uv.lock                # Locked dependencies (generated)
+ client.py              # RlCodeFixEnv client
+ models.py              # Action and Observation models
+ server/
+     __init__.py        # Server module exports
+     rl_code_fix_env_environment.py  # Core environment logic
+     app.py             # FastAPI application (HTTP + WebSocket endpoints)
+     Dockerfile         # Container image definition
+```

rl_code_fix_env/__init__.py ADDED Viewed

	@@ -0,0 +1,14 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+"""Rl Code Fix Env Environment."""
+from .models import CodeFixerAction, CodeFixerObservation
+__all__ = [
+    "CodeFixerAction",
+    "CodeFixerObservation",
+]

rl_code_fix_env/client.py ADDED Viewed

	@@ -0,0 +1,185 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+"""Code Fixer Environment Client."""
+import asyncio
+import inspect
+import logging
+from typing import Dict
+from openenv.core import EnvClient
+from openenv.core.client_types import StepResult
+from openenv.core.env_server.types import State
+from rl_code_fix_env.models import CodeFixerAction, CodeFixerObservation
+log = logging.getLogger(__name__)
+class CodeFixerEnv(
+    EnvClient[CodeFixerAction, CodeFixerObservation, State]
+):
+    """
+    Client for the Code Fixer Environment.
+    This client maintains a persistent WebSocket connection to the environment server,
+    enabling efficient multi-step interactions with lower latency.
+    Each client instance has its own dedicated environment session on the server.
+    Example:
+        >>> # Connect to a running server
+        >>> with CodeFixerEnv(base_url="http://localhost:8000") as client:
+        ...     result = client.reset()
+        ...     print(result.observation.code)
+        ...
+        ...     result = client.step(CodeFixerAction(type="run_tests"))
+        ...     print(result.observation.test_passed)
+    Example with Docker:
+        >>> # Automatically start container and connect
+        >>> client = CodeFixerEnv.from_docker_image("code_fixer-env:latest")
+        >>> try:
+        ...     result = client.reset()
+        ...     result = client.step(CodeFixerAction(type="run_tests"))
+        ... finally:
+        ...     client.close()
+    """
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self._loop = asyncio.new_event_loop()
+        # Store init args for reconnection
+        self._init_args = args
+        self._init_kwargs = kwargs
+    def _run_sync(self, result):
+        """Run coroutine results on this client's dedicated event loop."""
+        if inspect.iscoroutine(result):
+            return self._loop.run_until_complete(result)
+        return result
+    def _reconnect(self) -> None:
+        """
+        Tear down the dead event loop and WebSocket connection, then
+        re-initialise so the next call works cleanly.
+        Called automatically by reset() and step() when a 1011 / timeout
+        error is detected after an idle period.
+        """
+        log.warning("[CodeFixerEnv] WebSocket timed out  reconnecting...")
+        # Close the old loop gracefully
+        try:
+            self._run_sync(super().close())
+        except Exception:
+            pass
+        if not self._loop.is_closed():
+            self._loop.close()
+        # Re-initialise: fresh loop + fresh base-class state
+        self._loop = asyncio.new_event_loop()
+        super().__init__(*self._init_args, **self._init_kwargs)
+        log.warning("[CodeFixerEnv] Reconnected successfully.")
+    @staticmethod
+    def _is_reconnectable_ws_error(exc: Exception) -> bool:
+        err = str(exc).lower()
+        reconnect_markers = (
+            "1011",
+            "1006",
+            "keepalive",
+            "timed out",
+            "closed",
+            "close frame",
+            "connection closed",
+            "connectionclosed",
+            "websocket",
+        )
+        return any(marker in err for marker in reconnect_markers)
+    def reset(self):
+        """Reset the environment  auto-reconnects if the WebSocket died."""
+        try:
+            return self._run_sync(super().reset())
+        except Exception as exc:
+            if self._is_reconnectable_ws_error(exc):
+                self._reconnect()
+                return self._run_sync(super().reset())  # one retry
+            raise
+    def step(self, action: CodeFixerAction):
+        """Execute a step  auto-reconnects if the WebSocket died."""
+        try:
+            return self._run_sync(super().step(action))
+        except Exception as exc:
+            if self._is_reconnectable_ws_error(exc):
+                self._reconnect()
+                return self._run_sync(super().step(action))  # one retry
+            raise
+    def close(self):
+        """Close client resources and the dedicated event loop safely."""
+        try:
+            self._run_sync(super().close())
+        finally:
+            if not self._loop.is_closed():
+                self._loop.close()
+    def _step_payload(self, action: CodeFixerAction) -> Dict:
+        """
+        Convert CodeFixerAction to JSON payload for step message.
+        Args:
+            action: CodeFixerAction instance
+        Returns:
+            Dictionary representation suitable for JSON encoding
+        """
+        return {
+            "type": action.type,
+            "payload": action.payload,
+        }
+    def _parse_result(self, payload: Dict) -> StepResult[CodeFixerObservation]:
+        """
+        Parse server response into StepResult[CodeFixerObservation].
+        Args:
+            payload: JSON response data from server
+        Returns:
+            StepResult with CodeFixerObservation
+        """
+        obs_data = payload.get("observation", {})
+        observation = CodeFixerObservation(
+            code=obs_data.get("code", ""),
+            logs=obs_data.get("logs"),
+            test_score=float(obs_data.get("test_score", 0.0)),
+            total_tests=obs_data.get("total_tests", 1),
+            steps=obs_data.get("steps", 0),
+            done=obs_data.get("done", payload.get("done", False)),
+            reward=obs_data.get("reward", payload.get("reward")),
+        )
+        return StepResult(
+            observation=observation,
+            reward=payload.get("reward"),
+            done=payload.get("done", False),
+        )
+    def _parse_state(self, payload: Dict) -> State:
+        """
+        Parse server response into State object.
+        Args:
+            payload: JSON response from state request
+        Returns:
+            State object with episode_id and step_count
+        """
+        return State(
+            episode_id=payload.get("episode_id"),
+            step_count=payload.get("step_count", 0),
+        )

rl_code_fix_env/conftest.py ADDED Viewed

	@@ -0,0 +1,38 @@

+"""
+conftest.py  repo-root pytest configuration.
+Registers `src.dataset` as a sys.modules alias for `dataset` so that all
+problem test files using `from src.dataset.problem_X.buggy import ...`
+resolve correctly without needing to rename 24 test files.
+The physical layout is:
+    <repo_root>/dataset/problem_X/buggy.py    real files
+    <repo_root>/src/                          has environment/, reward/, etc.
+                                               but NO dataset/ subfolder
+With PYTHONPATH=<repo_root>:
+    import dataset.problem_1.buggy    works natively
+    import src.dataset.problem_1.buggy   would fail  fixed here via alias
+"""
+import sys
+import importlib
+from pathlib import Path
+_REPO_ROOT = str(Path(__file__).parent)
+if _REPO_ROOT not in sys.path:
+    sys.path.insert(0, _REPO_ROOT)
+import dataset as _real_dataset
+sys.modules.setdefault("src.dataset", _real_dataset)
+import pkgutil
+for _pkg in pkgutil.iter_modules(_real_dataset.__path__):
+    _full = f"dataset.{_pkg.name}"
+    _alias = f"src.dataset.{_pkg.name}"
+    try:
+        _mod = importlib.import_module(_full)
+        sys.modules.setdefault(_alias, _mod)
+    except Exception:
+        pass

rl_code_fix_env/dataset/README.md ADDED Viewed

	@@ -0,0 +1,20 @@

+# Buggy Training Dataset
+This dataset is organized as:
+- `problem_x/buggy.py`: intentionally buggy implementation
+- `problem_x/test.py`: correctness tests that should fail before fixes
+- optional extra modules (`helpers.py`, `cache.py`, etc.) to support multi-file bug fixing
+Current problems: `problem_1` to `problem_18`.
+Bug patterns included:
+- off-by-one errors
+- boundary condition mistakes
+- incorrect sorting direction
+- exception handling mistakes
+- state/recency bugs in cache logic
+- recursive base-case bugs
+- parsing and whitespace normalization issues
+- order-preservation regressions
+- matrix transformation direction errors

rl_code_fix_env/dataset/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ """Dataset loading modules."""

rl_code_fix_env/dataset/loader.py ADDED Viewed

	@@ -0,0 +1,111 @@

+"""Load static, competition-approved tasks."""
+import os
+import json
+from pathlib import Path
+from typing import Dict, List, Optional
+# Get the dataset root (same folder as this file)
+DATASET_ROOT = Path(__file__).parent
+# Hardcoded competition tasks: Easy  Medium  Hard
+STATIC_TASKS = {
+    "easy": {
+        "problem_id": "problem_1",
+        "difficulty": "easy",
+        "description": "String reversal with space normalization",
+    },
+    "medium": {
+        "problem_id": "problem_10",
+        "difficulty": "medium",
+        "description": "Matrix 90 clockwise rotation",
+    },
+    "hard": {
+        "problem_id": "problem_13",
+        "difficulty": "hard",
+        "description": "LRU cache with correct eviction policy",
+    },
+}
+def load_problem(problem_id: str) -> Dict[str, any]:
+    """
+    Load a single problem from disk.
+    Args:
+        problem_id: e.g., "problem_1", "problem_10", "problem_13"
+    Returns:
+        {
+            "code": str,          # buggy.py content
+            "tests": str,         # test.py path (relative to problem folder)
+            "metadata": dict,     # metadata.json
+            "problem_dir": str,   # absolute path to problem folder
+        }
+    """
+    problem_dir = DATASET_ROOT / problem_id
+    if not problem_dir.exists():
+        raise FileNotFoundError(f"Problem directory not found: {problem_dir}")
+    # Load buggy code
+    buggy_file = problem_dir / "buggy.py"
+    code = buggy_file.read_text(encoding="utf-8")
+    # Load metadata
+    metadata_file = problem_dir / "metadata.json"
+    metadata = json.loads(metadata_file.read_text(encoding="utf-8"))
+    # Test file path (relative to problem root)
+    test_path = str(problem_dir / "test.py")
+    return {
+        "code": code,
+        "tests": test_path,
+        "metadata": metadata,
+        "problem_dir": str(problem_dir),
+        "problem_id": problem_id,
+    }
+def get_hardcoded_task(difficulty: str) -> Dict[str, any]:
+    """
+    Get one of the three static competition tasks.
+    Args:
+        difficulty: "easy" | "medium" | "hard"
+    Returns:
+        Task dict with code, tests, metadata
+    Raises:
+        ValueError: if difficulty is not one of the three approved values
+    """
+    if difficulty not in STATIC_TASKS:
+        raise ValueError(
+            f"Invalid difficulty '{difficulty}'. "
+            f"Must be one of: {list(STATIC_TASKS.keys())}"
+        )
+    task_info = STATIC_TASKS[difficulty]
+    problem_id = task_info["problem_id"]
+    return load_problem(problem_id)
+def get_random_tasks():
+    """
+    DEPRECATED: Use get_hardcoded_task() instead.
+    Kept for backward compatibility.
+    """
+    import warnings
+    warnings.warn(
+        "get_random_tasks() is deprecated. Use get_hardcoded_task('easy'|'medium'|'hard')",
+        DeprecationWarning,
+        stacklevel=2
+    )
+    # Return a default (easy)
+    return get_hardcoded_task("easy")

rl_code_fix_env/dataset/problem_1/buggy.py ADDED Viewed

	@@ -0,0 +1,5 @@

+def reverse_words(text: str) -> str:
+    """Return the words in reverse order."""
+    # BUG: split(" ") keeps empty items for repeated spaces.
+    words = text.split(" ")
+    return " ".join(reversed(words))

rl_code_fix_env/dataset/problem_1/metadata.json ADDED Viewed

	@@ -0,0 +1,5 @@

+{
+  "difficulty": "easy",
+  "bug_type": "string-splitting",
+  "expected_steps": 1
+}

rl_code_fix_env/dataset/problem_1/test.py ADDED Viewed

	@@ -0,0 +1,14 @@

+import unittest
+from src.dataset.problem_1.buggy import reverse_words
+class TestReverseWords(unittest.TestCase):
+    def test_simple(self):
+        self.assertEqual(reverse_words("hello world"), "world hello")
+    def test_multiple_spaces(self):
+        self.assertEqual(reverse_words("one   two three"), "three two one")
+if __name__ == "__main__":
+    unittest.main()

rl_code_fix_env/dataset/problem_10/buggy.py ADDED Viewed

	@@ -0,0 +1,8 @@

+from src.dataset.problem_10.helpers import transpose
+def rotate_90_clockwise(matrix: list[list[int]]) -> list[list[int]]:
+    """Rotate matrix 90 degrees clockwise."""
+    t = transpose(matrix)
+    # BUG: this is counter-clockwise.
+    return t[::-1]

rl_code_fix_env/dataset/problem_10/helpers.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ def transpose(matrix: list[list[int]]) -> list[list[int]]:
2	+ return [list(row) for row in zip(*matrix)]

rl_code_fix_env/dataset/problem_10/metadata.json ADDED Viewed

	@@ -0,0 +1,5 @@

+{
+  "difficulty": "medium",
+  "bug_type": "matrix-transformation",
+  "expected_steps": 1
+}

rl_code_fix_env/dataset/problem_10/test.py ADDED Viewed

	@@ -0,0 +1,12 @@

+import unittest
+from src.dataset.problem_10.buggy import rotate_90_clockwise
+class TestRotateMatrix(unittest.TestCase):
+    def test_2x2(self):
+        matrix = [[1, 2], [3, 4]]
+        self.assertEqual(rotate_90_clockwise(matrix), [[3, 1], [4, 2]])
+if __name__ == "__main__":
+    unittest.main()

rl_code_fix_env/dataset/problem_11/buggy.py ADDED Viewed

	@@ -0,0 +1,14 @@

+def binary_search(nums: list[int], target: int) -> int:
+    """Return index of target, or -1 if not found."""
+    left, right = 0, len(nums) - 1
+    while left < right:
+        mid = (left + right) // 2
+        if nums[mid] == target:
+            return mid
+        if nums[mid] < target:
+            left = mid + 1
+        else:
+            right = mid - 1
+    return -1

rl_code_fix_env/dataset/problem_11/metadata.json ADDED Viewed

	@@ -0,0 +1,5 @@

+{
+  "difficulty": "medium",
+  "bug_type": "boundary-condition",
+  "expected_steps": 2
+}

rl_code_fix_env/dataset/problem_11/test.py ADDED Viewed

	@@ -0,0 +1,17 @@

+import unittest
+from src.dataset.problem_11.buggy import binary_search
+class TestBinarySearch(unittest.TestCase):
+    def test_found_middle(self):
+        self.assertEqual(binary_search([1, 3, 5, 7], 5), 2)
+    def test_found_last(self):
+        self.assertEqual(binary_search([1, 3, 5, 7], 7), 3)
+    def test_not_found(self):
+        self.assertEqual(binary_search([1, 3, 5, 7], 4), -1)
+if __name__ == "__main__":
+    unittest.main()

rl_code_fix_env/dataset/problem_12/buggy.py ADDED Viewed

	@@ -0,0 +1,11 @@

+def parse_pairs(raw: str) -> dict[str, int]:
+    """Parse strings like 'a=1,b=2' into a dict."""
+    result = {}
+    if not raw:
+        return result
+    for segment in raw.split(","):
+        key, value = segment.split("=")
+        # BUG: does not strip whitespace around keys/values.
+        result[key] = int(value)
+    return result

rl_code_fix_env/dataset/problem_12/metadata.json ADDED Viewed

	@@ -0,0 +1,5 @@

+{
+  "difficulty": "easy",
+  "bug_type": "string-normalization",
+  "expected_steps": 2
+}

rl_code_fix_env/dataset/problem_12/test.py ADDED Viewed

	@@ -0,0 +1,14 @@

+import unittest
+from src.dataset.problem_12.buggy import parse_pairs
+class TestParsePairs(unittest.TestCase):
+    def test_simple(self):
+        self.assertEqual(parse_pairs("a=1,b=2"), {"a": 1, "b": 2})
+    def test_spaces(self):
+        self.assertEqual(parse_pairs("x = 10, y = 20"), {"x": 10, "y": 20})
+if __name__ == "__main__":
+    unittest.main()

rl_code_fix_env/dataset/problem_13/buggy.py ADDED Viewed

	@@ -0,0 +1,10 @@

+from src.dataset.problem_13.cache import LRUCache
+def run_ops() -> tuple[int, int]:
+    cache = LRUCache(2)
+    cache.put("a", 1)
+    cache.put("b", 2)
+    _ = cache.get("a")
+    cache.put("c", 3)
+    return cache.get("a"), cache.get("b")

rl_code_fix_env/dataset/problem_13/cache.py ADDED Viewed

	@@ -0,0 +1,20 @@

+from collections import OrderedDict
+class LRUCache:
+    def __init__(self, capacity: int):
+        self.capacity = capacity
+        self.store: OrderedDict[str, int] = OrderedDict()
+    def get(self, key: str) -> int:
+        if key not in self.store:
+            return -1
+        # BUG: does not refresh recency when key is accessed.
+        return self.store[key]
+    def put(self, key: str, value: int) -> None:
+        if key in self.store:
+            self.store.pop(key)
+        self.store[key] = value
+        if len(self.store) > self.capacity:
+            self.store.popitem(last=False)

rl_code_fix_env/dataset/problem_13/metadata.json ADDED Viewed

	@@ -0,0 +1,5 @@

+{
+  "difficulty": "hard",
+  "bug_type": "state-logic",
+  "expected_steps": 2
+}

rl_code_fix_env/dataset/problem_13/test.py ADDED Viewed

	@@ -0,0 +1,13 @@

+import unittest
+from src.dataset.problem_13.buggy import run_ops
+class TestLRU(unittest.TestCase):
+    def test_recency_update_on_get(self):
+        a, b = run_ops()
+        self.assertEqual(a, 1)
+        self.assertEqual(b, -1)
+if __name__ == "__main__":
+    unittest.main()

rl_code_fix_env/dataset/problem_14/buggy.py ADDED Viewed

	@@ -0,0 +1,6 @@

+def fibonacci_recursive(n: int) -> int:
+    """Return nth Fibonacci number."""
+    # BUG: wrong base case for n == 0.
+    if n <= 1:
+        return 1
+    return fibonacci_recursive(n - 1) + fibonacci_recursive(n - 2)

rl_code_fix_env/dataset/problem_14/metadata.json ADDED Viewed

	@@ -0,0 +1,5 @@

+{
+  "difficulty": "easy",
+  "bug_type": "recursion-base-case",
+  "expected_steps": 2
+}

rl_code_fix_env/dataset/problem_14/test.py ADDED Viewed

	@@ -0,0 +1,15 @@

+import unittest
+from src.dataset.problem_14.buggy import fibonacci_recursive
+class TestFibonacciRecursive(unittest.TestCase):
+    def test_base_cases(self):
+        self.assertEqual(fibonacci_recursive(0), 0)
+        self.assertEqual(fibonacci_recursive(1), 1)
+    def test_n5(self):
+        self.assertEqual(fibonacci_recursive(5), 5)
+if __name__ == "__main__":
+    unittest.main()

rl_code_fix_env/dataset/problem_15/buggy.py ADDED Viewed

	@@ -0,0 +1,4 @@

+def has_overlap(a: tuple[int, int], b: tuple[int, int]) -> bool:
+    """Check if closed intervals [a0, a1] and [b0, b1] overlap."""
+    # BUG: uses strict inequalities, missing touching-boundary overlap.
+    return a[0] < b[1] and b[0] < a[1]

rl_code_fix_env/dataset/problem_15/metadata.json ADDED Viewed

	@@ -0,0 +1,5 @@

+{
+  "difficulty": "medium",
+  "bug_type": "boundary-condition",
+  "expected_steps": 1
+}

rl_code_fix_env/dataset/problem_15/test.py ADDED Viewed

	@@ -0,0 +1,14 @@

+import unittest
+from src.dataset.problem_15.buggy import has_overlap
+class TestIntervalOverlap(unittest.TestCase):
+    def test_overlapping(self):
+        self.assertTrue(has_overlap((1, 5), (4, 9)))
+    def test_touching_endpoints(self):
+        self.assertTrue(has_overlap((1, 3), (3, 7)))
+if __name__ == "__main__":
+    unittest.main()

rl_code_fix_env/dataset/problem_16/buggy.py ADDED Viewed

	@@ -0,0 +1,10 @@

+from src.dataset.problem_16.helpers import normalize_scores
+def top_label(scores: dict[str, float]) -> str:
+    """Return label with highest normalized probability."""
+    labels = list(scores.keys())
+    probs = normalize_scores(list(scores.values()))
+    # BUG: chooses min instead of max.
+    idx = min(range(len(probs)), key=lambda i: probs[i])
+    return labels[idx]

rl_code_fix_env/dataset/problem_16/helpers.py ADDED Viewed

	@@ -0,0 +1,3 @@

+def normalize_scores(scores: list[float]) -> list[float]:
+    total = sum(scores)
+    return [s / total for s in scores]

rl_code_fix_env/dataset/problem_16/metadata.json ADDED Viewed

	@@ -0,0 +1,5 @@

+{
+  "difficulty": "easy",
+  "bug_type": "logic-error",
+  "expected_steps": 1
+}

rl_code_fix_env/dataset/problem_16/test.py ADDED Viewed

	@@ -0,0 +1,12 @@

+import unittest
+from src.dataset.problem_16.buggy import top_label
+class TestTopLabel(unittest.TestCase):
+    def test_select_highest(self):
+        scores = {"cat": 0.2, "dog": 0.7, "bird": 0.1}
+        self.assertEqual(top_label(scores), "dog")
+if __name__ == "__main__":
+    unittest.main()

rl_code_fix_env/dataset/problem_17/buggy.py ADDED Viewed

	@@ -0,0 +1,11 @@

+def dedupe_preserve_order(items: list[int]) -> list[int]:
+    """Remove duplicates while preserving first occurrence order."""
+    seen = set()
+    out = []
+    for item in items:
+        # BUG: keeps last occurrence logic effectively by replacing list.
+        if item in seen:
+            out = [x for x in out if x != item]
+        seen.add(item)
+        out.append(item)
+    return out

rl_code_fix_env/dataset/problem_17/metadata.json ADDED Viewed

	@@ -0,0 +1,5 @@

+{
+  "difficulty": "medium",
+  "bug_type": "logic-error",
+  "expected_steps": 2
+}

rl_code_fix_env/dataset/problem_17/test.py ADDED Viewed

	@@ -0,0 +1,11 @@

+import unittest
+from src.dataset.problem_17.buggy import dedupe_preserve_order
+class TestDedupe(unittest.TestCase):
+    def test_order(self):
+        self.assertEqual(dedupe_preserve_order([1, 2, 1, 3, 2]), [1, 2, 3])
+if __name__ == "__main__":
+    unittest.main()

rl_code_fix_env/dataset/problem_18/buggy.py ADDED Viewed

	@@ -0,0 +1,14 @@

+from src.dataset.problem_18.math_utils import clamp
+def moving_average(nums: list[int], window: int) -> list[float]:
+    """Simple moving average over a fixed window."""
+    if window <= 0:
+        raise ValueError("window must be positive")
+    window = clamp(window, 1, len(nums))
+    out = []
+    # BUG: end index is off-by-one; misses final valid window.
+    for i in range(0, len(nums) - window):
+        out.append(sum(nums[i : i + window]) / window)
+    return out

rl_code_fix_env/dataset/problem_18/math_utils.py ADDED Viewed

	@@ -0,0 +1,6 @@

+def clamp(value: int, low: int, high: int) -> int:
+    if value < low:
+        return low
+    if value > high:
+        return high
+    return value

rl_code_fix_env/dataset/problem_18/metadata.json ADDED Viewed

	@@ -0,0 +1,5 @@

+{
+  "difficulty": "medium",
+  "bug_type": "off-by-one",
+  "expected_steps": 1
+}

rl_code_fix_env/dataset/problem_18/test.py ADDED Viewed

	@@ -0,0 +1,14 @@

+import unittest
+from src.dataset.problem_18.buggy import moving_average
+class TestMovingAverage(unittest.TestCase):
+    def test_window_3(self):
+        self.assertEqual(moving_average([1, 2, 3, 4, 5], 3), [2.0, 3.0, 4.0])
+    def test_window_larger_than_data(self):
+        self.assertEqual(moving_average([2, 4], 5), [3.0])
+if __name__ == "__main__":
+    unittest.main()

rl_code_fix_env/dataset/problem_19/buggy.py ADDED Viewed

	@@ -0,0 +1,36 @@

+def calculate_employee_bonus(employees: list[dict], metrics: dict) -> list[dict]:
+    """
+    Calculate employee bonuses based on their base salary, performance rating,
+    and company-wide metrics.
+    employees: list of dicts with 'id', 'role', 'base_salary', 'rating' (1-5)
+    metrics: dict with 'company_multiplier' and 'department_multipliers'
+    Returns a list of dicts with 'id' and 'bonus'.
+    """
+    results = []
+    for emp in employees:
+        # BUG 1: Division by zero risk if rating is 0 or missing, and type mismatch if salary is string
+        base = emp.get('base_salary', 0)
+        rating = emp.get('rating', 1)
+        # BUG 2: Incorrect logic for role based multiplier, using assignment instead of lookup
+        role_mult = metrics.get('department_multipliers', {})[emp.get('role')] # will raise KeyError if role not found
+        # Calculate base bonus
+        if rating > 3:
+            base_bonus = base * 0.1
+        elif rating == 3:
+            base_bonus = base * 0.05
+        else:
+            base_bonus = 0
+        # BUG 3: Does not apply company multiplier correctly to the total
+        total_bonus = base_bonus * role_mult + metrics.get('company_multiplier', 1)
+        # BUG 4: mutating original dict instead of creating new one
+        emp['bonus'] = total_bonus
+        results.append(emp)
+    return results

rl_code_fix_env/dataset/problem_19/metadata.json ADDED Viewed

	@@ -0,0 +1,5 @@

+{
+  "difficulty": "hard",
+  "bug_type": "multiple",
+  "expected_steps": 4
+}

rl_code_fix_env/dataset/problem_19/test.py ADDED Viewed

	@@ -0,0 +1,48 @@

+import pytest
+from src.dataset.problem_19.buggy import calculate_employee_bonus
+def test_calculate_employee_bonus():
+    employees = [
+        {'id': 1, 'role': 'engineering', 'base_salary': 100000, 'rating': 4},
+        {'id': 2, 'role': 'sales', 'base_salary': '80000', 'rating': 3},
+        {'id': 3, 'role': 'hr', 'base_salary': 60000, 'rating': 2},
+        {'id': 4, 'role': 'unknown', 'base_salary': 50000, 'rating': 5}
+    ]
+    metrics = {
+        'company_multiplier': 1.2,
+        'department_multipliers': {
+            'engineering': 1.5,
+            'sales': 1.2,
+            'hr': 1.0
+        }
+    }
+    # Original dicts should not be modified
+    orig_employees = [dict(e) for e in employees]
+    results = calculate_employee_bonus(employees, metrics)
+    # Check if original was modified
+    assert employees == orig_employees, "Original list was mutated"
+    # Check results format
+    assert len(results) == 4
+    for r in results:
+        assert 'id' in r
+        assert 'bonus' in r
+        assert 'role' not in r # Should only contain id and bonus
+    # Check values
+    # Emp 1: 100000 * 0.1 * 1.5 * 1.2 = 18000
+    assert results[0]['bonus'] == 18000
+    # Emp 2: 80000 * 0.05 * 1.2 * 1.2 = 5760 (string salary handling)
+    assert results[1]['bonus'] == 5760
+    # Emp 3: 0 bonus due to rating 2
+    assert results[2]['bonus'] == 0
+    # Emp 4: unknown role falls back to 1.0 multiplier
+    # 50000 * 0.1 * 1.0 * 1.2 = 6000
+    assert results[3]['bonus'] == 6000

rl_code_fix_env/dataset/problem_2/buggy.py ADDED Viewed

	@@ -0,0 +1,5 @@

+def is_palindrome(text: str) -> bool:
+    """Check whether text is a palindrome."""
+    # BUG: does not normalize case or skip non-alphanumeric chars.
+    cleaned = text.strip()
+    return cleaned == cleaned[::-1]