Spaces:

SolusOps
/

tracefix_rl

Sleeping

App Files Files Community

databoysu commited on Apr 8

Commit

9882200

1 Parent(s): 100d601

SWE-Gym v1

Browse files

Files changed (19) hide show

Dockerfile +9 -10
README.md +53 -17
__init__.py +3 -2
client.py +9 -5
inference.py +11 -7
models.py +1 -1
openenv.yaml +1 -1
openenv_python_debugging_gym.egg-info/PKG-INFO +0 -11
openenv_python_debugging_gym.egg-info/SOURCES.txt +0 -19
openenv_python_debugging_gym.egg-info/dependency_links.txt +0 -1
openenv_python_debugging_gym.egg-info/entry_points.txt +0 -2
openenv_python_debugging_gym.egg-info/requires.txt +0 -7
openenv_python_debugging_gym.egg-info/top_level.txt +0 -1
pyproject.toml +6 -6
server/Dockerfile +0 -79
server/__init__.py +3 -3
server/app.py +5 -5
server/requirements.txt +0 -7
server/{my_env_environment.py → swe_gym_environment.py} +2 -2

Dockerfile CHANGED Viewed

@@ -23,18 +23,10 @@ RUN if ! command -v uv >/dev/null 2>&1; then \
     fi
 RUN --mount=type=cache,target=/root/.cache/uv \
-    if [ -f uv.lock ]; then \
-        uv sync --frozen --no-install-project --no-editable; \
-    else \
-        uv sync --no-install-project --no-editable; \
-    fi
 RUN --mount=type=cache,target=/root/.cache/uv \
-    if [ -f uv.lock ]; then \
-        uv sync --frozen --no-editable; \
-    else \
-        uv sync --no-editable; \
-    fi
 FROM ${BASE_IMAGE}
@@ -43,10 +35,17 @@ WORKDIR /app
 COPY --from=builder /app/env/.venv /app/.venv
 COPY --from=builder /app/env /app/env
 ENV PATH="/app/.venv/bin:$PATH"
 ENV PYTHONPATH="/app/env:$PYTHONPATH"
 HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
     CMD curl -f http://localhost:7860/health || exit 1
 CMD ["sh", "-c", "cd /app/env && uvicorn server.app:app --host 0.0.0.0 --port 7860"]

     fi
 RUN --mount=type=cache,target=/root/.cache/uv \
+    uv sync --no-install-project --no-editable
 RUN --mount=type=cache,target=/root/.cache/uv \
+    uv sync --no-editable
 FROM ${BASE_IMAGE}
 COPY --from=builder /app/env/.venv /app/.venv
 COPY --from=builder /app/env /app/env
+# Ported from RL_ENV Dockerfile hardening:
+# run the HF Space as uid 1000 rather than root.
+RUN useradd -m -u 1000 appuser && \
+    chown -R appuser:appuser /app
 ENV PATH="/app/.venv/bin:$PATH"
 ENV PYTHONPATH="/app/env:$PYTHONPATH"
 HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
     CMD curl -f http://localhost:7860/health || exit 1
+USER appuser
 CMD ["sh", "-c", "cd /app/env && uvicorn server.app:app --host 0.0.0.0 --port 7860"]

README.md CHANGED Viewed

@@ -1,6 +1,6 @@
 ---
-title: Python Debugging Gym
-emoji: 🐛
 colorFrom: blue
 colorTo: cyan
 sdk: docker
@@ -10,27 +10,40 @@ base_path: /web
 tags:
   - openenv
   - reinforcement-learning
-  - code-generation
 ---
-# Python Debugging Gym
-An OpenEnv-compatible RL environment where agents debug broken Python code by
-iteratively viewing, editing, and testing code snippets until all tests pass.
-## Environment Overview
 - Action space:
 `VIEW_CODE`, `RUN_TESTS`, `REPLACE_LINES`, `UNDO_EDIT`, `RESET_TO_ORIGINAL`, `SUBMIT`
-- Observation includes:
-`code_lines`, `localized_context`, `last_execution_output`, `syntax_error`, `test_results`
-- Dense reward with step cost and final score on submit.
 ## Local Run
 ```bash
 uv sync
-uv run --project . server --port 7860
 ```
 Server endpoints:
@@ -38,18 +51,41 @@ Server endpoints:
 - `POST /step`
 - `GET /health`
 - `WS /ws`
-- `GET /web` (OpenEnv web UI)
-## Deploy to Hugging Face Spaces
 ```bash
-openenv push
 ```
-## Validate Submission
-From repo:
 ```bash
-./pre-val.sh https://<your-space>.hf.space .
 ```

 ---
+title: SWE-Gym - Software Engineer Gym
+emoji: 🧑‍💻
 colorFrom: blue
 colorTo: cyan
 sdk: docker
 tags:
   - openenv
   - reinforcement-learning
+  - software-engineering
 ---
+# SWE-Gym - Software Engineer Gym
+SWE-Gym is an OpenEnv-compatible RL environment where an agent must debug broken
+Python code by iteratively inspecting source, running tests, editing lines, and
+submitting once all tests pass.
+## Core Design
 - Action space:
 `VIEW_CODE`, `RUN_TESTS`, `REPLACE_LINES`, `UNDO_EDIT`, `RESET_TO_ORIGINAL`, `SUBMIT`
+- Observation includes full code, localized edit context, execution output, syntax status, and per-test outcomes.
+- Dense rewards:
+`RUN_TESTS` bonus, per-test progress bonus, step-cost penalty, invalid-edit penalties, and final clamped score in `[0, 1]`.
+- Curriculum-ready task sampling:
+easy/medium/hard buckets with safe random fallback for evaluator runs.
+## Environment Files
+- `models.py`: action/observation schemas
+- `tasks.py`: static curated task registry
+- `sandbox.py`: isolated timed execution
+- `environment.py`: reset/step/reward logic
+- `context.py`: localized code windowing
+- `server/app.py`: FastAPI OpenEnv server entry
+- `inference.py`: baseline OpenAI-client inference script
 ## Local Run
 ```bash
 uv sync
+uv run --project . server
 ```
 Server endpoints:
 - `POST /step`
 - `GET /health`
 - `WS /ws`
+- `GET /web`
+## Inference Script Compliance
+- Script location/name: `inference.py` at repo root.
+- Uses OpenAI client with:
+`API_BASE_URL`, `MODEL_NAME`, `HF_TOKEN`.
+- Supports local container mode with:
+`LOCAL_IMAGE_NAME`.
+- Emits standardized stdout lines in this order:
+`[START]`, one or more `[STEP]`, then `[END]`.
+- Final score is clamped to `[0, 1]`.
+## Docker + Deployment
+Build locally:
 ```bash
+docker build -t swe-gym:test -f Dockerfile .
+```
+Run locally:
+```bash
+docker run --rm -p 7860:7860 swe-gym:test
 ```
+Deploy to Hugging Face:
+```bash
+openenv push
+```
+Validate before submit:
 ```bash
+bash ./pre-val.sh https://<your-space>.hf.space .
 ```

__init__.py CHANGED Viewed

@@ -4,14 +4,15 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
-"""Python Debugging Gym OpenEnv package."""
-from .client import MyEnv
 from .models import CodeAction, CodeObservation, TestResult
 __all__ = [
     "CodeAction",
     "CodeObservation",
     "TestResult",
     "MyEnv",
 ]

 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
+"""SWE-Gym OpenEnv package."""
+from .client import MyEnv, SWEGymEnv
 from .models import CodeAction, CodeObservation, TestResult
 __all__ = [
     "CodeAction",
     "CodeObservation",
     "TestResult",
+    "SWEGymEnv",
     "MyEnv",
 ]

client.py CHANGED Viewed

@@ -4,7 +4,7 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
-"""Client for the Python Debugging Gym OpenEnv environment."""
 from typing import Dict
@@ -18,11 +18,11 @@ except ImportError:
     from models import CodeAction, CodeObservation, TestResult
-class MyEnv(
     EnvClient[CodeAction, CodeObservation, State]
 ):
     """
-    Client for the My Env Environment.
     This client maintains a persistent WebSocket connection to the environment server,
     enabling efficient multi-step interactions with lower latency.
@@ -30,7 +30,7 @@ class MyEnv(
     Example:
         >>> # Connect to a running server
-        >>> with MyEnv(base_url="http://localhost:8000") as client:
         ...     result = client.reset()
         ...     print(result.observation.echoed_message)
         ...
@@ -39,7 +39,7 @@ class MyEnv(
     Example with Docker:
         >>> # Automatically start container and connect
-        >>> client = MyEnv.from_docker_image("my_env-env:latest")
         >>> try:
         ...     result = client.reset()
         ...     result = client.step(MyAction(message="Test"))
@@ -107,3 +107,7 @@ class MyEnv(
             episode_id=payload.get("episode_id"),
             step_count=payload.get("step_count", 0),
         )

 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
+"""Client for the SWE-Gym OpenEnv environment."""
 from typing import Dict
     from models import CodeAction, CodeObservation, TestResult
+class SWEGymEnv(
     EnvClient[CodeAction, CodeObservation, State]
 ):
     """
+    Client for the SWE-Gym environment.
     This client maintains a persistent WebSocket connection to the environment server,
     enabling efficient multi-step interactions with lower latency.
     Example:
         >>> # Connect to a running server
+        >>> with SWEGymEnv(base_url="http://localhost:7860") as client:
         ...     result = client.reset()
         ...     print(result.observation.echoed_message)
         ...
     Example with Docker:
         >>> # Automatically start container and connect
+        >>> client = SWEGymEnv.from_docker_image("swe-gym:latest")
         >>> try:
         ...     result = client.reset()
         ...     result = client.step(MyAction(message="Test"))
             episode_id=payload.get("episode_id"),
             step_count=payload.get("step_count", 0),
         )
+# Backward-compatible alias for older code paths.
+MyEnv = SWEGymEnv

inference.py CHANGED Viewed

@@ -1,5 +1,5 @@
 """
-Inference script for Python Debugging Gym.
 Mandatory env vars expected in deployment config:
   API_BASE_URL
@@ -23,7 +23,11 @@ from typing import Any
 from openai import OpenAI
-from my_env import CodeAction, MyEnv
 API_BASE_URL = os.getenv("API_BASE_URL", "https://router.huggingface.co/v1")
@@ -32,8 +36,8 @@ HF_TOKEN = os.getenv("HF_TOKEN", "")
 LOCAL_IMAGE_NAME = os.getenv("LOCAL_IMAGE_NAME", "")
 ENV_BASE_URL = os.getenv("ENV_BASE_URL", "http://localhost:7860")
-TASK_NAME = os.getenv("TASK_NAME", "python_debugging_gym")
-BENCHMARK = os.getenv("BENCHMARK", "python_debugging_gym")
 MAX_STEPS = int(os.getenv("MAX_STEPS", "50"))
 SUCCESS_SCORE_THRESHOLD = float(os.getenv("SUCCESS_SCORE_THRESHOLD", "0.99"))
@@ -166,7 +170,7 @@ def _compute_score(step_result: Any, rewards: list[float]) -> float:
 async def main() -> None:
     client = OpenAI(base_url=API_BASE_URL, api_key=HF_TOKEN)
-    env: MyEnv | None = None
     rewards: list[float] = []
     history: list[str] = []
     steps_taken = 0
@@ -176,9 +180,9 @@ async def main() -> None:
     try:
         if LOCAL_IMAGE_NAME:
-            env = await MyEnv.from_docker_image(LOCAL_IMAGE_NAME)
         else:
-            env = MyEnv(base_url=ENV_BASE_URL)
         result = await env.reset()
         task_name = result.observation.info.get("task_name") or TASK_NAME

 """
+Inference script for SWE-Gym - Software Engineer Gym.
 Mandatory env vars expected in deployment config:
   API_BASE_URL
 from openai import OpenAI
+try:
+    from swe_gym import CodeAction, SWEGymEnv
+except ImportError:
+    from client import SWEGymEnv
+    from models import CodeAction
 API_BASE_URL = os.getenv("API_BASE_URL", "https://router.huggingface.co/v1")
 LOCAL_IMAGE_NAME = os.getenv("LOCAL_IMAGE_NAME", "")
 ENV_BASE_URL = os.getenv("ENV_BASE_URL", "http://localhost:7860")
+TASK_NAME = os.getenv("TASK_NAME", "swe_gym")
+BENCHMARK = os.getenv("BENCHMARK", "swe_gym")
 MAX_STEPS = int(os.getenv("MAX_STEPS", "50"))
 SUCCESS_SCORE_THRESHOLD = float(os.getenv("SUCCESS_SCORE_THRESHOLD", "0.99"))
 async def main() -> None:
     client = OpenAI(base_url=API_BASE_URL, api_key=HF_TOKEN)
+    env: SWEGymEnv | None = None
     rewards: list[float] = []
     history: list[str] = []
     steps_taken = 0
     try:
         if LOCAL_IMAGE_NAME:
+            env = await SWEGymEnv.from_docker_image(LOCAL_IMAGE_NAME)
         else:
+            env = SWEGymEnv(base_url=ENV_BASE_URL)
         result = await env.reset()
         task_name = result.observation.info.get("task_name") or TASK_NAME

models.py CHANGED Viewed

@@ -1,4 +1,4 @@
-"""Pydantic schema layer for the Python Debugging Gym OpenEnv environment."""
 from __future__ import annotations


1	+ """Pydantic schema layer for SWE-Gym - Software Engineer Gym."""
2
3	from __future__ import annotations
4

openenv.yaml CHANGED Viewed

@@ -1,5 +1,5 @@
 spec_version: 1
-name: python_debugging_gym
 type: space
 runtime: fastapi
 app: server.app:app

 spec_version: 1
+name: swe_gym
 type: space
 runtime: fastapi
 app: server.app:app

openenv_python_debugging_gym.egg-info/PKG-INFO DELETED Viewed

@@ -1,11 +0,0 @@
-Metadata-Version: 2.4
-Name: openenv-python-debugging-gym
-Version: 0.1.0
-Summary: Python Debugging Gym environment for OpenEnv
-Requires-Python: >=3.10
-Requires-Dist: openenv-core[core]>=0.2.2
-Requires-Dist: openai>=1.30.0
-Requires-Dist: websockets>=12.0
-Provides-Extra: dev
-Requires-Dist: pytest>=8.0.0; extra == "dev"
-Requires-Dist: pytest-cov>=4.0.0; extra == "dev"

openenv_python_debugging_gym.egg-info/SOURCES.txt DELETED Viewed

@@ -1,19 +0,0 @@
-README.md
-pyproject.toml
-./__init__.py
-./client.py
-./context.py
-./environment.py
-./inference.py
-./models.py
-./sandbox.py
-./tasks.py
-openenv_python_debugging_gym.egg-info/PKG-INFO
-openenv_python_debugging_gym.egg-info/SOURCES.txt
-openenv_python_debugging_gym.egg-info/dependency_links.txt
-openenv_python_debugging_gym.egg-info/entry_points.txt
-openenv_python_debugging_gym.egg-info/requires.txt
-openenv_python_debugging_gym.egg-info/top_level.txt
-server/__init__.py
-server/app.py
-server/my_env_environment.py

openenv_python_debugging_gym.egg-info/dependency_links.txt DELETED Viewed

	@@ -1 +0,0 @@
1	-

openenv_python_debugging_gym.egg-info/entry_points.txt DELETED Viewed

	@@ -1,2 +0,0 @@
1	- [console_scripts]
2	- server = my_env.server.app:main

openenv_python_debugging_gym.egg-info/requires.txt DELETED Viewed

@@ -1,7 +0,0 @@
-openenv-core[core]>=0.2.2
-openai>=1.30.0
-websockets>=12.0
-[dev]
-pytest>=8.0.0
-pytest-cov>=4.0.0

openenv_python_debugging_gym.egg-info/top_level.txt DELETED Viewed

	@@ -1 +0,0 @@
1	- my_env

pyproject.toml CHANGED Viewed

@@ -9,9 +9,9 @@ requires = ["setuptools>=45", "wheel"]
 build-backend = "setuptools.build_meta"
 [project]
-name = "openenv-python-debugging-gym"
 version = "0.1.0"
-description = "Python Debugging Gym environment for OpenEnv"
 requires-python = ">=3.10"
 dependencies = [
     # Core OpenEnv runtime (provides FastAPI server + HTTP client types)
@@ -38,10 +38,10 @@ dev = [
 [project.scripts]
 # Server entry point - enables running via: uv run --project . server
-# or: python -m my_env.server.app
-server = "my_env.server.app:main"
 [tool.setuptools]
 include-package-data = true
-packages = ["my_env", "my_env.server"]
-package-dir = { "my_env" = ".", "my_env.server" = "server" }

 build-backend = "setuptools.build_meta"
 [project]
+name = "openenv-swe-gym"
 version = "0.1.0"
+description = "SWE-Gym - Software Engineer Gym environment for OpenEnv"
 requires-python = ">=3.10"
 dependencies = [
     # Core OpenEnv runtime (provides FastAPI server + HTTP client types)
 [project.scripts]
 # Server entry point - enables running via: uv run --project . server
+# or: python -m swe_gym.server.app
+server = "swe_gym.server.app:main"
 [tool.setuptools]
 include-package-data = true
+packages = ["swe_gym", "swe_gym.server"]
+package-dir = { "swe_gym" = ".", "swe_gym.server" = "server" }

server/Dockerfile DELETED Viewed

@@ -1,79 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the BSD-style license found in the
-# LICENSE file in the root directory of this source tree.
-# Multi-stage build using openenv-base
-# This Dockerfile is flexible and works for both:
-# - In-repo environments (with local OpenEnv sources)
-# - Standalone environments (with openenv from PyPI/Git)
-# The build script (openenv build) handles context detection and sets appropriate build args.
-ARG BASE_IMAGE=ghcr.io/meta-pytorch/openenv-base:latest
-FROM ${BASE_IMAGE} AS builder
-WORKDIR /app
-# Ensure git is available (required for installing dependencies from VCS)
-RUN apt-get update && \
-    apt-get install -y --no-install-recommends git && \
-    rm -rf /var/lib/apt/lists/*
-# Build argument to control whether we're building standalone or in-repo
-ARG BUILD_MODE=standalone
-# Copy environment code (always at root of build context)
-COPY . /app/env
-# For in-repo builds, openenv is already vendored in the build context
-# For standalone builds, openenv will be installed via pyproject.toml
-WORKDIR /app/env
-# Ensure uv is available (for local builds where base image lacks it)
-RUN if ! command -v uv >/dev/null 2>&1; then \
-        curl -LsSf https://astral.sh/uv/install.sh | sh && \
-        mv /root/.local/bin/uv /usr/local/bin/uv && \
-        mv /root/.local/bin/uvx /usr/local/bin/uvx; \
-    fi
-# Install dependencies using uv sync
-# If uv.lock exists, use it; otherwise resolve on the fly
-RUN --mount=type=cache,target=/root/.cache/uv \
-    if [ -f uv.lock ]; then \
-        uv sync --frozen --no-install-project --no-editable; \
-    else \
-        uv sync --no-install-project --no-editable; \
-    fi
-RUN --mount=type=cache,target=/root/.cache/uv \
-    if [ -f uv.lock ]; then \
-        uv sync --frozen --no-editable; \
-    else \
-        uv sync --no-editable; \
-    fi
-# Final runtime stage
-FROM ${BASE_IMAGE}
-WORKDIR /app
-# Copy the virtual environment from builder
-COPY --from=builder /app/env/.venv /app/.venv
-# Copy the environment code
-COPY --from=builder /app/env /app/env
-# Set PATH to use the virtual environment
-ENV PATH="/app/.venv/bin:$PATH"
-# Set PYTHONPATH so imports work correctly
-ENV PYTHONPATH="/app/env:$PYTHONPATH"
-# Health check
-HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
-    CMD curl -f http://localhost:7860/health || exit 1
-# Run the FastAPI server
-# The module path is constructed to work with the /app/env structure
-CMD ["sh", "-c", "cd /app/env && uvicorn server.app:app --host 0.0.0.0 --port 7860"]

server/__init__.py CHANGED Viewed

@@ -4,8 +4,8 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
-"""My Env environment server components."""
-from .my_env_environment import MyEnvironment
-__all__ = ["MyEnvironment"]

 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
+"""SWE-Gym server components."""
+from .swe_gym_environment import SWEGymEnvironment
+__all__ = ["SWEGymEnvironment"]

server/app.py CHANGED Viewed

@@ -4,7 +4,7 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
-"""FastAPI entry point for the Python Debugging Gym OpenEnv environment."""
 try:
     from openenv.core.env_server.http_server import create_app
@@ -15,22 +15,22 @@ except Exception as e:  # pragma: no cover
 try:
     from ..models import CodeAction, CodeObservation
-    from .my_env_environment import MyEnvironment
 except ImportError:
     import sys
     from pathlib import Path
     sys.path.insert(0, str(Path(__file__).resolve().parents[1]))
     from models import CodeAction, CodeObservation
-    from server.my_env_environment import MyEnvironment
 # Create the app with web interface and README integration
 app = create_app(
-    MyEnvironment,
     CodeAction,
     CodeObservation,
-    env_name="python_debugging_gym",
     max_concurrent_envs=1,  # increase this number to allow more concurrent WebSocket sessions
 )

 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
+"""FastAPI entry point for SWE-Gym - Software Engineer Gym."""
 try:
     from openenv.core.env_server.http_server import create_app
 try:
     from ..models import CodeAction, CodeObservation
+    from .swe_gym_environment import SWEGymEnvironment
 except ImportError:
     import sys
     from pathlib import Path
     sys.path.insert(0, str(Path(__file__).resolve().parents[1]))
     from models import CodeAction, CodeObservation
+    from server.swe_gym_environment import SWEGymEnvironment
 # Create the app with web interface and README integration
 app = create_app(
+    SWEGymEnvironment,
     CodeAction,
     CodeObservation,
+    env_name="swe_gym",
     max_concurrent_envs=1,  # increase this number to allow more concurrent WebSocket sessions
 )

server/requirements.txt DELETED Viewed

@@ -1,7 +0,0 @@
-openenv[core]>=0.2.0
-fastapi>=0.115.0
-uvicorn>=0.24.0
-openai>=1.30.0
-websockets>=12.0

server/{my_env_environment.py → swe_gym_environment.py} RENAMED Viewed

@@ -4,7 +4,7 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
-"""OpenEnv adapter around the PythonDebuggingGym core environment."""
 from openenv.core.env_server.interfaces import Environment
 from openenv.core.env_server.types import State
@@ -17,7 +17,7 @@ except ImportError:
     from models import CodeAction, CodeObservation
-class MyEnvironment(Environment):
     """Environment implementation compatible with OpenEnv's server interface."""
     # Enable concurrent WebSocket sessions.

 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
+"""OpenEnv adapter around the SWE-Gym core environment."""
 from openenv.core.env_server.interfaces import Environment
 from openenv.core.env_server.types import State
     from models import CodeAction, CodeObservation
+class SWEGymEnvironment(Environment):
     """Environment implementation compatible with OpenEnv's server interface."""
     # Enable concurrent WebSocket sessions.