Spaces:

Sneha7
/

code-debug-env

No application file

App Files Files Community

Sneha Rudra commited on Jan 18

Commit

1e3b07a

0 Parent(s):

Initial commit: Code Debugging Challenge OpenEnv environment

Browse files

Files changed (10) hide show

.gitignore +27 -0
README.md +100 -0
client.py +70 -0
models.py +47 -0
pyproject.toml +24 -0
server/Dockerfile +16 -0
server/__init__.py +5 -0
server/app.py +26 -0
server/debug_environment.py +253 -0
tests/test_environment.py +48 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,27 @@

+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+.pytest_cache/
+.coverage
+htmlcov/
+.env
+.venv
+env/
+venv/

README.md ADDED Viewed

	@@ -0,0 +1,100 @@

+---
+title: Code Debugging Challenge
+emoji: 🐛
+colorFrom: blue
+colorTo: purple
+sdk: docker
+pinned: false
+license: apache-2.0
+tags:
+  - openenv
+  - reinforcement-learning
+  - code-debugging
+  - agentic-ai
+---
+# 🐛 Code Debugging Challenge - OpenEnv Environment
+A production-ready OpenEnv environment where AI agents learn to debug Python code.
+## 🎯 Overview
+This environment challenges AI agents to identify and fix bugs in Python code snippets using the official **OpenEnv framework** from Meta-PyTorch and Hugging Face.
+**Key Features:**
+- ✅ Built with official OpenEnv library
+- ✅ WebSocket-based client-server architecture
+- ✅ Docker containerized for isolation
+- ✅ Compatible with TRL, Torchforge, and other RL frameworks
+- ✅ Production-ready with proper session management
+## 🏗️ Environment Details
+- **Action Space**: 4 discrete actions (analyze, fix, test, submit)
+- **Observation Space**: Structured observations with code, errors, and feedback
+- **Reward Structure**:
+  - +1.0 for successful fix
+  - -0.2 to -0.5 for failed attempts
+  - +0.1 for analysis actions
+  - -1.0 for premature submission
+- **Episode Length**: Max 5 attempts per bug
+## 🐞 Bug Types Included
+1. **Argument Count Errors** - Wrong number of function arguments
+2. **Logic Errors** - Incorrect loop variables and conditions
+3. **Exception Handling** - Missing error handling for edge cases
+4. **Index Errors** - Array/string index out of bounds
+5. **Infinite Recursion** - Recursive calls without base case reduction
+6. **Type Errors** - String/integer concatenation issues
+7. **Key Errors** - Missing dictionary keys
+## 🚀 Quick Start
+### Using Docker (Recommended)
+```python
+from code_debug_env.client import DebugEnv
+# Automatically starts Docker container and connects
+env = DebugEnv.from_hub("openenv/code-debug-env")
+# Reset to get first challenge
+result = env.reset()
+print(result.observation.buggy_code)
+print(f"Expected output: {result.observation.expected_output}")
+# Take action
+from code_debug_env.models import DebugAction
+action = DebugAction(action_type="test")
+result = env.step(action)
+print(f"Reward: {result.reward}")
+# Cleanup
+env.close()
+```
+## 🔧 Integration with RL Frameworks
+### With TRL (Transformer Reinforcement Learning)
+```python
+from trl import OnlineDPOConfig, OnlineDPOTrainer
+from code_debug_env.client import DebugEnv
+config = OnlineDPOConfig(...)
+trainer = OnlineDPOTrainer(
+    config=config,
+    env=DebugEnv.from_hub("openenv/code-debug-env"),
+    # ... other args
+)
+trainer.train()
+```
+## 🏆 OpenEnv Challenge Submission
+This environment is submitted to the **OpenEnv Challenge: SOTA Environments to Drive General Intelligence** (UC Berkeley AgentBeats Competition).
+## 📜 License
+Apache 2.0

client.py ADDED Viewed

	@@ -0,0 +1,70 @@

+"""
+WebSocket client for Code Debugging Challenge environment.
+"""
+from openenv.core.env_client import EnvClient
+from openenv.core.client_types import StepResult, ResetResult
+from .models import DebugAction, DebugObservation, DebugState
+class DebugEnv(EnvClient[DebugAction, DebugObservation, DebugState]):
+    """Client for interacting with Code Debugging Challenge environment."""
+    def _step_payload(self, action: DebugAction) -> dict:
+        """Convert action to JSON payload for server."""
+        return {
+            "action_type": action.action_type,
+            "content": action.content
+        }
+    def _parse_result(self, data: dict) -> StepResult[DebugObservation]:
+        """Parse step response from server into typed result."""
+        obs_data = data["observation"]
+        observation = DebugObservation(
+            buggy_code=obs_data["buggy_code"],
+            expected_output=obs_data["expected_output"],
+            test_inputs=obs_data.get("test_inputs", []),
+            current_output=obs_data.get("current_output"),
+            error_message=obs_data.get("error_message"),
+            attempts_remaining=obs_data["attempts_remaining"],
+            hint=obs_data.get("hint"),
+            success=obs_data.get("success", False)
+        )
+        return StepResult(
+            observation=observation,
+            reward=data["reward"],
+            terminated=data["terminated"],
+            truncated=data["truncated"],
+            info=data.get("info", {})
+        )
+    def _parse_reset_result(self, data: dict) -> ResetResult[DebugObservation]:
+        """Parse reset response from server into typed result."""
+        obs_data = data["observation"]
+        observation = DebugObservation(
+            buggy_code=obs_data["buggy_code"],
+            expected_output=obs_data["expected_output"],
+            test_inputs=obs_data.get("test_inputs", []),
+            attempts_remaining=obs_data.get("attempts_remaining", 5),
+            success=False
+        )
+        return ResetResult(
+            observation=observation,
+            info=data.get("info", {})
+        )
+    def _parse_state(self, data: dict) -> DebugState:
+        """Parse state response from server into typed state."""
+        return DebugState(
+            current_problem_index=data.get("current_problem_index", 0),
+            attempts_made=data.get("attempts_made", 0),
+            max_attempts=data.get("max_attempts", 5),
+            score=data.get("score", 0.0),
+            solved=data.get("solved", False),
+            total_problems=data.get("total_problems", 7),
+            episode_id=data.get("episode_id", "")
+        )

models.py ADDED Viewed

	@@ -0,0 +1,47 @@

+"""
+Data models for Code Debugging Challenge environment.
+"""
+from dataclasses import dataclass, field
+from typing import Optional, Literal
+from openenv.core.env_server import Action, Observation, State
+@dataclass
+class DebugAction(Action):
+    """Actions the agent can take in the debugging environment."""
+    action_type: Literal["analyze", "fix", "test", "submit"]
+    content: Optional[str] = None
+    def __post_init__(self):
+        """Validate action consistency."""
+        if self.action_type == "fix" and self.content is None:
+            raise ValueError("fix action requires content")
+@dataclass
+class DebugObservation(Observation):
+    """Observations returned to the agent after each step."""
+    buggy_code: str
+    expected_output: str
+    test_inputs: list[str] = field(default_factory=list)
+    current_output: Optional[str] = None
+    error_message: Optional[str] = None
+    attempts_remaining: int = 5
+    hint: Optional[str] = None
+    success: bool = False
+@dataclass
+class DebugState(State):
+    """Internal state tracking for the environment."""
+    current_problem_index: int = 0
+    attempts_made: int = 0
+    max_attempts: int = 5
+    score: float = 0.0
+    solved: bool = False
+    total_problems: int = 7
+    episode_id: str = ""

pyproject.toml ADDED Viewed

	@@ -0,0 +1,24 @@

+[project]
+name = "code-debug-env"
+version = "1.0.0"
+description = "OpenEnv environment for training agents to debug Python code"
+readme = "README.md"
+requires-python = ">=3.10"
+license = {text = "Apache-2.0"}
+keywords = ["openenv", "reinforcement-learning", "debugging", "ai-agents"]
+dependencies = [
+    "openenv-core>=0.1.1",
+]
+[project.optional-dependencies]
+dev = [
+    "pytest>=7.0.0",
+    "pytest-asyncio>=0.21.0",
+    "black>=23.0.0",
+    "ruff>=0.1.0",
+]
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"

server/Dockerfile ADDED Viewed

	@@ -0,0 +1,16 @@

+FROM ghcr.io/meta-pytorch/openenv-base:latest AS base
+WORKDIR /app
+COPY pyproject.toml README.md ./
+COPY models.py ./
+COPY client.py ./
+COPY server/ ./server/
+RUN pip install --no-cache-dir -e .
+EXPOSE 8000
+WORKDIR /app/server
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8000"]

server/__init__.py ADDED Viewed

	@@ -0,0 +1,5 @@

+"""Code Debugging Challenge environment server."""
+from .debug_environment import DebugEnvironment
+__all__ = ["DebugEnvironment"]

server/app.py ADDED Viewed

	@@ -0,0 +1,26 @@

+"""
+FastAPI server for Code Debugging Challenge environment.
+"""
+from openenv.core.env_server import create_app
+from ..models import DebugAction, DebugObservation
+from .debug_environment import DebugEnvironment
+def create_debug_environment():
+    """Factory function to create environment instances."""
+    return DebugEnvironment()
+# Create FastAPI app with OpenEnv integration
+app = create_app(
+    create_debug_environment,
+    DebugAction,
+    DebugObservation,
+    env_name="code_debug_env"
+)
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=8000)

server/debug_environment.py ADDED Viewed

	@@ -0,0 +1,253 @@

+"""
+Core environment logic for Code Debugging Challenge.
+"""
+import uuid
+import random
+import sys
+from io import StringIO
+from typing import Optional
+from openenv.core.env_server import Environment
+from ..models import DebugAction, DebugObservation, DebugState
+# Bug database with various Python bugs
+BUG_DATABASE = [
+    {
+        "buggy_code": "def add_numbers(a, b):\n    return a + b\n\nresult = add_numbers(5)\nprint(result)",
+        "fixed_code": "def add_numbers(a, b):\n    return a + b\n\nresult = add_numbers(5, 3)\nprint(result)",
+        "expected_output": "8",
+        "test_inputs": [],
+        "hint": "Function is called with wrong number of arguments",
+        "bug_type": "argument_count"
+    },
+    {
+        "buggy_code": "numbers = [1, 2, 3, 4, 5]\ntotal = 0\nfor i in range(len(numbers)):\n    total += i\nprint(total)",
+        "fixed_code": "numbers = [1, 2, 3, 4, 5]\ntotal = 0\nfor i in range(len(numbers)):\n    total += numbers[i]\nprint(total)",
+        "expected_output": "15",
+        "test_inputs": [],
+        "hint": "Loop variable is not being used correctly",
+        "bug_type": "logic_error"
+    },
+    {
+        "buggy_code": "def divide(a, b):\n    return a / b\n\nprint(divide(10, 0))",
+        "fixed_code": "def divide(a, b):\n    if b == 0:\n        return 'Error: Division by zero'\n    return a / b\n\nprint(divide(10, 0))",
+        "expected_output": "Error: Division by zero",
+        "test_inputs": [],
+        "hint": "Need to handle edge case when dividing by zero",
+        "bug_type": "exception_handling"
+    },
+    {
+        "buggy_code": "text = 'Hello World'\nprint(text[100])",
+        "fixed_code": "text = 'Hello World'\nif len(text) > 100:\n    print(text[100])\nelse:\n    print('Index out of range')",
+        "expected_output": "Index out of range",
+        "test_inputs": [],
+        "hint": "Index is out of bounds for the string",
+        "bug_type": "index_error"
+    },
+    {
+        "buggy_code": "def factorial(n):\n    if n == 0:\n        return 1\n    return n * factorial(n)\n\nprint(factorial(5))",
+        "fixed_code": "def factorial(n):\n    if n == 0:\n        return 1\n    return n * factorial(n - 1)\n\nprint(factorial(5))",
+        "expected_output": "120",
+        "test_inputs": [],
+        "hint": "Recursive call is not reducing the problem size",
+        "bug_type": "infinite_recursion"
+    },
+    {
+        "buggy_code": "name = 'Alice'\nage = 25\nprint('My name is ' + name + ' and I am ' + age + ' years old')",
+        "fixed_code": "name = 'Alice'\nage = 25\nprint('My name is ' + name + ' and I am ' + str(age) + ' years old')",
+        "expected_output": "My name is Alice and I am 25 years old",
+        "test_inputs": [],
+        "hint": "Cannot concatenate string and integer directly",
+        "bug_type": "type_error"
+    },
+    {
+        "buggy_code": "my_dict = {'a': 1, 'b': 2}\nprint(my_dict['c'])",
+        "fixed_code": "my_dict = {'a': 1, 'b': 2}\nprint(my_dict.get('c', 'Key not found'))",
+        "expected_output": "Key not found",
+        "test_inputs": [],
+        "hint": "Key does not exist in dictionary",
+        "bug_type": "key_error"
+    },
+]
+class DebugEnvironment(Environment):
+    """Code Debugging Challenge Environment."""
+    supports_concurrent_sessions = True
+    def __init__(self):
+        super().__init__()
+        self._state = DebugState(
+            episode_id=str(uuid.uuid4()),
+            total_problems=len(BUG_DATABASE)
+        )
+        self.current_problem = None
+    def reset(self) -> DebugObservation:
+        """Reset environment and return initial observation."""
+        self._state = DebugState(
+            episode_id=str(uuid.uuid4()),
+            total_problems=len(BUG_DATABASE)
+        )
+        self.current_problem = random.choice(BUG_DATABASE)
+        return DebugObservation(
+            buggy_code=self.current_problem["buggy_code"],
+            expected_output=self.current_problem["expected_output"],
+            test_inputs=self.current_problem.get("test_inputs", []),
+            attempts_remaining=self._state.max_attempts,
+            success=False
+        )
+    def step(self, action: DebugAction) -> DebugObservation:
+        """Execute one step in the environment."""
+        self._state.attempts_made += 1
+        if action.action_type == "analyze":
+            return self._handle_analyze()
+        elif action.action_type == "fix":
+            return self._handle_fix(action.content)
+        elif action.action_type == "test":
+            return self._handle_test()
+        elif action.action_type == "submit":
+            return self._handle_submit()
+        else:
+            return DebugObservation(
+                buggy_code=self.current_problem["buggy_code"],
+                expected_output=self.current_problem["expected_output"],
+                test_inputs=self.current_problem.get("test_inputs", []),
+                error_message="Invalid action type",
+                attempts_remaining=self._state.max_attempts - self._state.attempts_made,
+                success=False
+            )
+    def _handle_analyze(self) -> DebugObservation:
+        """Handle analyze action."""
+        return DebugObservation(
+            buggy_code=self.current_problem["buggy_code"],
+            expected_output=self.current_problem["expected_output"],
+            test_inputs=self.current_problem.get("test_inputs", []),
+            attempts_remaining=self._state.max_attempts - self._state.attempts_made,
+            success=False
+        )
+    def _handle_fix(self, code_fix: Optional[str]) -> DebugObservation:
+        """Handle fix action."""
+        if code_fix is None:
+            return DebugObservation(
+                buggy_code=self.current_problem["buggy_code"],
+                expected_output=self.current_problem["expected_output"],
+                test_inputs=self.current_problem.get("test_inputs", []),
+                error_message="No fix provided",
+                attempts_remaining=self._state.max_attempts - self._state.attempts_made,
+                success=False
+            )
+        output, error = self._execute_code(code_fix)
+        if error:
+            return DebugObservation(
+                buggy_code=self.current_problem["buggy_code"],
+                expected_output=self.current_problem["expected_output"],
+                test_inputs=self.current_problem.get("test_inputs", []),
+                current_output=output,
+                error_message=error,
+                attempts_remaining=self._state.max_attempts - self._state.attempts_made,
+                hint=self.current_problem["hint"] if self._state.attempts_made >= 2 else None,
+                success=False
+            )
+        if output.strip() == self.current_problem["expected_output"].strip():
+            self._state.solved = True
+            self._state.score += 1.0
+            return DebugObservation(
+                buggy_code=self.current_problem["buggy_code"],
+                expected_output=self.current_problem["expected_output"],
+                test_inputs=self.current_problem.get("test_inputs", []),
+                current_output=output,
+                attempts_remaining=self._state.max_attempts - self._state.attempts_made,
+                success=True
+            )
+        else:
+            return DebugObservation(
+                buggy_code=self.current_problem["buggy_code"],
+                expected_output=self.current_problem["expected_output"],
+                test_inputs=self.current_problem.get("test_inputs", []),
+                current_output=output,
+                error_message=f"Output mismatch. Got: {output.strip()}, Expected: {self.current_problem['expected_output'].strip()}",
+                attempts_remaining=self._state.max_attempts - self._state.attempts_made,
+                hint=self.current_problem["hint"] if self._state.attempts_made >= 2 else None,
+                success=False
+            )
+    def _handle_test(self) -> DebugObservation:
+        """Handle test action - run the buggy code to see the error."""
+        output, error = self._execute_code(self.current_problem["buggy_code"])
+        return DebugObservation(
+            buggy_code=self.current_problem["buggy_code"],
+            expected_output=self.current_problem["expected_output"],
+            test_inputs=self.current_problem.get("test_inputs", []),
+            current_output=output,
+            error_message=error,
+            attempts_remaining=self._state.max_attempts - self._state.attempts_made,
+            success=False
+        )
+    def _handle_submit(self) -> DebugObservation:
+        """Handle early submission without fixing."""
+        return DebugObservation(
+            buggy_code=self.current_problem["buggy_code"],
+            expected_output=self.current_problem["expected_output"],
+            test_inputs=self.current_problem.get("test_inputs", []),
+            attempts_remaining=0,
+            success=False
+        )
+    def _execute_code(self, code: str) -> tuple[str, Optional[str]]:
+        """Safely execute code and capture output/errors."""
+        old_stdout = sys.stdout
+        sys.stdout = StringIO()
+        try:
+            exec(code, {})
+            output = sys.stdout.getvalue()
+            error = None
+        except Exception as e:
+            output = sys.stdout.getvalue()
+            error = f"{type(e).__name__}: {str(e)}"
+        finally:
+            sys.stdout = old_stdout
+        return output, error
+    @property
+    def state(self) -> DebugState:
+        """Return current environment state."""
+        return self._state
+    def reward(self, observation: DebugObservation) -> float:
+        """Compute reward based on observation."""
+        if observation.success:
+            return 1.0
+        if observation.error_message and "Error:" not in observation.error_message:
+            return -0.3
+        if observation.current_output and not observation.success:
+            return -0.2
+        if observation.current_output is None and observation.error_message is None:
+            return 0.1
+        if observation.attempts_remaining == 0 and not observation.success:
+            return -1.0
+        if observation.error_message == "No fix provided":
+            return -0.5
+        return 0.0
+    def terminated(self, observation: DebugObservation) -> bool:
+        """Episode terminates on success or max attempts."""
+        return observation.success or self._state.attempts_made >= self._state.max_attempts
+    def truncated(self, observation: DebugObservation) -> bool:
+        """Episode is truncated if max attempts reached without success."""
+        return (self._state.attempts_made >= self._state.max_attempts
+                and not observation.success)

tests/test_environment.py ADDED Viewed

	@@ -0,0 +1,48 @@

+"""Tests for Code Debugging Challenge environment."""
+import pytest
+from server.debug_environment import DebugEnvironment
+from models import DebugAction
+def test_environment_reset():
+    """Test environment reset."""
+    env = DebugEnvironment()
+    obs = env.reset()
+    assert obs.buggy_code is not None
+    assert obs.expected_output is not None
+    assert obs.attempts_remaining == 5
+    assert not obs.success
+def test_analyze_action():
+    """Test analyze action."""
+    env = DebugEnvironment()
+    env.reset()
+    action = DebugAction(action_type="analyze")
+    obs = env.step(action)
+    reward = env.reward(obs)
+    assert reward == 0.1
+    assert not env.terminated(obs)
+def test_successful_fix():
+    """Test successful bug fix."""
+    env = DebugEnvironment()
+    env.reset()
+    correct_fix = env.current_problem["fixed_code"]
+    action = DebugAction(action_type="fix", content=correct_fix)
+    obs = env.step(action)
+    reward = env.reward(obs)
+    assert obs.success
+    assert reward == 1.0
+    assert env.terminated(obs)
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])