Spaces:
Sleeping
Sleeping
databoysu commited on
Commit ·
49d9b3d
1
Parent(s): dcdd52f
clamp logic fix
Browse files- .dockerignore +3 -1
- .gitignore +3 -1
- .hfignore +3 -0
- Dockerfile +1 -1
- README.md +2 -2
- backend/__init__.py +0 -5
- backend/app.py +0 -57
- backend/tracefix_rl_environment.py +0 -67
- core/environment.py +3 -3
- inference.py +2 -2
- openenv.yaml +1 -1
- pyproject.toml +4 -4
- vision_ui.py +1 -1
.dockerignore
CHANGED
|
@@ -29,4 +29,6 @@ outputs/
|
|
| 29 |
|
| 30 |
CLAUDE.md
|
| 31 |
package.json
|
| 32 |
-
package-lock.json
|
|
|
|
|
|
|
|
|
| 29 |
|
| 30 |
CLAUDE.md
|
| 31 |
package.json
|
| 32 |
+
package-lock.json
|
| 33 |
+
openenv_tracefix_rl.egg-info/
|
| 34 |
+
test_redirect.py
|
.gitignore
CHANGED
|
@@ -12,4 +12,6 @@ package-lock.json
|
|
| 12 |
.agent/
|
| 13 |
**/__pycache__/
|
| 14 |
**/.venv/
|
| 15 |
-
**/node_modules/
|
|
|
|
|
|
|
|
|
| 12 |
.agent/
|
| 13 |
**/__pycache__/
|
| 14 |
**/.venv/
|
| 15 |
+
**/node_modules/
|
| 16 |
+
openenv_tracefix_rl.egg-info/
|
| 17 |
+
test_redirect.py
|
.hfignore
CHANGED
|
@@ -33,3 +33,6 @@ build/
|
|
| 33 |
outputs/
|
| 34 |
*.log
|
| 35 |
.github/
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
outputs/
|
| 34 |
*.log
|
| 35 |
.github/
|
| 36 |
+
|
| 37 |
+
openenv_tracefix_rl.egg-info/
|
| 38 |
+
test_redirect.py
|
Dockerfile
CHANGED
|
@@ -52,4 +52,4 @@ WORKDIR /app/env
|
|
| 52 |
|
| 53 |
USER appuser
|
| 54 |
|
| 55 |
-
CMD ["uvicorn", "
|
|
|
|
| 52 |
|
| 53 |
USER appuser
|
| 54 |
|
| 55 |
+
CMD ["uvicorn", "server.app:app", "--host", "0.0.0.0", "--port", "7860"]
|
README.md
CHANGED
|
@@ -54,7 +54,7 @@ Every task contains: `name`, `description`, `difficulty`, `bug_type`, `code` (bu
|
|
| 54 |
|
| 55 |
This environment enforces strict typing and uses standard modern tooling:
|
| 56 |
- **`uv`:** Handles dependency management (see `pyproject.toml`).
|
| 57 |
-
- **FastAPI:** Provides the `
|
| 58 |
- **Pydantic (v2):** Provides strong validation layers for `models.py` (e.g., `CodeAction`, `CodeObservation`).
|
| 59 |
- **OpenEnv Config:** See `openenv.yaml` which specifies `tracefix_rl` to run the FastAPI app on port `7860`.
|
| 60 |
|
|
@@ -63,7 +63,7 @@ This environment enforces strict typing and uses standard modern tooling:
|
|
| 63 |
- `tasks.py`: Task metadata definitions.
|
| 64 |
- `sandbox.py`: Subprocess runtime and output tracking.
|
| 65 |
- `environment.py`: Reset/step/reward core RL loop logic (`TraceFixRLGym`).
|
| 66 |
-
- `
|
| 67 |
- `inference.py`: Baseline OpenAI-client inference script to evaluate agents.
|
| 68 |
|
| 69 |
## Local Development
|
|
|
|
| 54 |
|
| 55 |
This environment enforces strict typing and uses standard modern tooling:
|
| 56 |
- **`uv`:** Handles dependency management (see `pyproject.toml`).
|
| 57 |
+
- **FastAPI:** Provides the `server.app` integration layer for OpenEnv compliance.
|
| 58 |
- **Pydantic (v2):** Provides strong validation layers for `models.py` (e.g., `CodeAction`, `CodeObservation`).
|
| 59 |
- **OpenEnv Config:** See `openenv.yaml` which specifies `tracefix_rl` to run the FastAPI app on port `7860`.
|
| 60 |
|
|
|
|
| 63 |
- `tasks.py`: Task metadata definitions.
|
| 64 |
- `sandbox.py`: Subprocess runtime and output tracking.
|
| 65 |
- `environment.py`: Reset/step/reward core RL loop logic (`TraceFixRLGym`).
|
| 66 |
+
- `server/tracefix_rl_environment.py` / `server/app.py`: Maps the OpenAI/OpenEnv network interface to the core environment.
|
| 67 |
- `inference.py`: Baseline OpenAI-client inference script to evaluate agents.
|
| 68 |
|
| 69 |
## Local Development
|
backend/__init__.py
DELETED
|
@@ -1,5 +0,0 @@
|
|
| 1 |
-
"""TraceFix-RL backend components."""
|
| 2 |
-
|
| 3 |
-
from .tracefix_rl_environment import TraceFixRLEnvironment
|
| 4 |
-
|
| 5 |
-
__all__ = ["TraceFixRLEnvironment"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
backend/app.py
DELETED
|
@@ -1,57 +0,0 @@
|
|
| 1 |
-
"""FastAPI entry point for TraceFix-RL."""
|
| 2 |
-
|
| 3 |
-
import gradio as gr
|
| 4 |
-
from vision_ui import demo
|
| 5 |
-
|
| 6 |
-
try:
|
| 7 |
-
from openenv.core.env_server.http_server import create_app
|
| 8 |
-
except Exception as e: # pragma: no cover
|
| 9 |
-
raise ImportError(
|
| 10 |
-
"openenv is required for the web interface. Install dependencies with '\n uv sync\n'"
|
| 11 |
-
) from e
|
| 12 |
-
|
| 13 |
-
try:
|
| 14 |
-
from core.models import CodeAction, CodeObservation
|
| 15 |
-
from backend.tracefix_rl_environment import TraceFixRLEnvironment
|
| 16 |
-
except ImportError:
|
| 17 |
-
import sys
|
| 18 |
-
from pathlib import Path
|
| 19 |
-
|
| 20 |
-
sys.path.insert(0, str(Path(__file__).resolve().parents[1]))
|
| 21 |
-
from core.models import CodeAction, CodeObservation
|
| 22 |
-
from backend.tracefix_rl_environment import TraceFixRLEnvironment
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
app = create_app(
|
| 26 |
-
TraceFixRLEnvironment,
|
| 27 |
-
CodeAction,
|
| 28 |
-
CodeObservation,
|
| 29 |
-
env_name="tracefix_rl",
|
| 30 |
-
max_concurrent_envs=1,
|
| 31 |
-
)
|
| 32 |
-
|
| 33 |
-
from fastapi.responses import RedirectResponse
|
| 34 |
-
|
| 35 |
-
@app.get("/", include_in_schema=False)
|
| 36 |
-
async def root_redirect():
|
| 37 |
-
return RedirectResponse(url="/web/")
|
| 38 |
-
|
| 39 |
-
@app.get("/web", include_in_schema=False)
|
| 40 |
-
async def web_no_slash_redirect():
|
| 41 |
-
return RedirectResponse(url="/web/")
|
| 42 |
-
|
| 43 |
-
app = gr.mount_gradio_app(app, demo, path="/web")
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
def main() -> None:
|
| 47 |
-
"""Entry point for local and container execution."""
|
| 48 |
-
import os
|
| 49 |
-
import uvicorn
|
| 50 |
-
|
| 51 |
-
host = os.environ.get("HOST", "0.0.0.0")
|
| 52 |
-
port = int(os.environ.get("PORT", "7860"))
|
| 53 |
-
uvicorn.run(app, host=host, port=port)
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
if __name__ == "__main__":
|
| 57 |
-
main()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
backend/tracefix_rl_environment.py
DELETED
|
@@ -1,67 +0,0 @@
|
|
| 1 |
-
"""OpenEnv adapter around the TraceFix-RL core environment."""
|
| 2 |
-
|
| 3 |
-
from openenv.core.env_server.interfaces import Environment
|
| 4 |
-
from openenv.core.env_server.types import State
|
| 5 |
-
|
| 6 |
-
try:
|
| 7 |
-
from core.environment import TraceFixRLGym
|
| 8 |
-
from core.models import CodeAction, CodeObservation
|
| 9 |
-
except ImportError:
|
| 10 |
-
from core.environment import TraceFixRLGym
|
| 11 |
-
from core.models import CodeAction, CodeObservation
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
class TraceFixRLEnvironment(Environment):
|
| 15 |
-
"""Environment implementation compatible with OpenEnv's server interface."""
|
| 16 |
-
|
| 17 |
-
SUPPORTS_CONCURRENT_SESSIONS: bool = True
|
| 18 |
-
|
| 19 |
-
def __init__(self):
|
| 20 |
-
self._gym = TraceFixRLGym()
|
| 21 |
-
self._state = State(episode_id="", step_count=0)
|
| 22 |
-
|
| 23 |
-
def reset(self, difficulty: str | None = None, task_name: str | None = None) -> CodeObservation:
|
| 24 |
-
if difficulty == "easy":
|
| 25 |
-
self._gym.training_step = 1
|
| 26 |
-
elif difficulty == "medium":
|
| 27 |
-
self._gym.training_step = 2000
|
| 28 |
-
elif difficulty == "hard":
|
| 29 |
-
self._gym.training_step = 6000
|
| 30 |
-
|
| 31 |
-
task_dict = None
|
| 32 |
-
if task_name and task_name != "tracefix_rl":
|
| 33 |
-
try:
|
| 34 |
-
from tasks.tasks import ALL_TASKS
|
| 35 |
-
for t in ALL_TASKS:
|
| 36 |
-
if t.get("name") == task_name:
|
| 37 |
-
task_dict = t
|
| 38 |
-
break
|
| 39 |
-
except ImportError:
|
| 40 |
-
pass
|
| 41 |
-
|
| 42 |
-
obs, system_prompt = self._gym.reset(task_index=task_dict)
|
| 43 |
-
self._state = State(
|
| 44 |
-
episode_id=obs.info.get("episode_id", ""),
|
| 45 |
-
step_count=obs.step_count,
|
| 46 |
-
)
|
| 47 |
-
metadata = dict(obs.metadata or {})
|
| 48 |
-
metadata["system_prompt"] = system_prompt
|
| 49 |
-
obs.metadata = metadata
|
| 50 |
-
return obs
|
| 51 |
-
|
| 52 |
-
def step(self, action: CodeAction) -> CodeObservation: # type: ignore[override]
|
| 53 |
-
obs, reward, done, info = self._gym.step(action)
|
| 54 |
-
obs.reward = reward
|
| 55 |
-
obs.done = done
|
| 56 |
-
metadata = dict(obs.metadata or {})
|
| 57 |
-
metadata.update(info)
|
| 58 |
-
obs.metadata = metadata
|
| 59 |
-
self._state = State(
|
| 60 |
-
episode_id=obs.info.get("episode_id", ""),
|
| 61 |
-
step_count=obs.step_count,
|
| 62 |
-
)
|
| 63 |
-
return obs
|
| 64 |
-
|
| 65 |
-
@property
|
| 66 |
-
def state(self) -> State:
|
| 67 |
-
return self._state
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
core/environment.py
CHANGED
|
@@ -298,7 +298,7 @@ class TraceFixRLGym:
|
|
| 298 |
total = len(results)
|
| 299 |
passes = 0 if syntax_err else sum(1 for t in results if t.passed)
|
| 300 |
raw = (passes / total if total > 0 else 0.0) - self._accumulated_step_costs
|
| 301 |
-
reward = max(0.
|
| 302 |
self._last_output += (
|
| 303 |
f"\n⚠ Max steps ({MAX_STEPS}) reached. "
|
| 304 |
f"Auto-evaluated: {passes}/{total} tests passing. "
|
|
@@ -314,7 +314,7 @@ class TraceFixRLGym:
|
|
| 314 |
"step": self._step_count,
|
| 315 |
}
|
| 316 |
if self._done:
|
| 317 |
-
info["final_score"] = max(0.
|
| 318 |
|
| 319 |
return obs, round(reward, 4), self._done, info
|
| 320 |
|
|
@@ -467,7 +467,7 @@ class TraceFixRLGym:
|
|
| 467 |
|
| 468 |
proportion = passes / total if total > 0 else 0.0
|
| 469 |
raw_score = proportion - self._accumulated_step_costs
|
| 470 |
-
final_score = max(0.
|
| 471 |
|
| 472 |
if not syntax_err:
|
| 473 |
if passes == total:
|
|
|
|
| 298 |
total = len(results)
|
| 299 |
passes = 0 if syntax_err else sum(1 for t in results if t.passed)
|
| 300 |
raw = (passes / total if total > 0 else 0.0) - self._accumulated_step_costs
|
| 301 |
+
reward = max(0.01, min(0.99, raw))
|
| 302 |
self._last_output += (
|
| 303 |
f"\n⚠ Max steps ({MAX_STEPS}) reached. "
|
| 304 |
f"Auto-evaluated: {passes}/{total} tests passing. "
|
|
|
|
| 314 |
"step": self._step_count,
|
| 315 |
}
|
| 316 |
if self._done:
|
| 317 |
+
info["final_score"] = max(0.01, min(0.99, round(reward, 4)))
|
| 318 |
|
| 319 |
return obs, round(reward, 4), self._done, info
|
| 320 |
|
|
|
|
| 467 |
|
| 468 |
proportion = passes / total if total > 0 else 0.0
|
| 469 |
raw_score = proportion - self._accumulated_step_costs
|
| 470 |
+
final_score = max(0.01, min(0.99, raw_score))
|
| 471 |
|
| 472 |
if not syntax_err:
|
| 473 |
if passes == total:
|
inference.py
CHANGED
|
@@ -46,7 +46,7 @@ ENV_BASE_URL = os.getenv("ENV_BASE_URL", "http://127.0.0.1:7860")
|
|
| 46 |
TASK_NAME = os.getenv("TASK_NAME", "tracefix_rl")
|
| 47 |
BENCHMARK = os.getenv("BENCHMARK", "tracefix_rl")
|
| 48 |
MAX_STEPS = int(os.getenv("MAX_STEPS", "50"))
|
| 49 |
-
SUCCESS_SCORE_THRESHOLD = float(os.getenv("SUCCESS_SCORE_THRESHOLD", "0.
|
| 50 |
|
| 51 |
SYSTEM_PROMPT = """\
|
| 52 |
You are a deterministic debugging policy agent.
|
|
@@ -296,7 +296,7 @@ def _compute_score(step_result: Any, rewards: list[float]) -> float:
|
|
| 296 |
raw = info.get("final_score")
|
| 297 |
if raw is None:
|
| 298 |
raw = sum(rewards)
|
| 299 |
-
return max(0.
|
| 300 |
|
| 301 |
|
| 302 |
async def run(difficulty: Optional[str] = None, show_thought: bool = False) -> None:
|
|
|
|
| 46 |
TASK_NAME = os.getenv("TASK_NAME", "tracefix_rl")
|
| 47 |
BENCHMARK = os.getenv("BENCHMARK", "tracefix_rl")
|
| 48 |
MAX_STEPS = int(os.getenv("MAX_STEPS", "50"))
|
| 49 |
+
SUCCESS_SCORE_THRESHOLD = float(os.getenv("SUCCESS_SCORE_THRESHOLD", "0.98"))
|
| 50 |
|
| 51 |
SYSTEM_PROMPT = """\
|
| 52 |
You are a deterministic debugging policy agent.
|
|
|
|
| 296 |
raw = info.get("final_score")
|
| 297 |
if raw is None:
|
| 298 |
raw = sum(rewards)
|
| 299 |
+
return max(0.01, min(0.99, float(raw)))
|
| 300 |
|
| 301 |
|
| 302 |
async def run(difficulty: Optional[str] = None, show_thought: bool = False) -> None:
|
openenv.yaml
CHANGED
|
@@ -2,5 +2,5 @@ spec_version: 1
|
|
| 2 |
name: tracefix_rl
|
| 3 |
type: space
|
| 4 |
runtime: fastapi
|
| 5 |
-
app:
|
| 6 |
port: 7860
|
|
|
|
| 2 |
name: tracefix_rl
|
| 3 |
type: space
|
| 4 |
runtime: fastapi
|
| 5 |
+
app: server.app:app
|
| 6 |
port: 7860
|
pyproject.toml
CHANGED
|
@@ -38,10 +38,10 @@ dev = [
|
|
| 38 |
|
| 39 |
[project.scripts]
|
| 40 |
# Server entry point - enables running via: uv run --project . server
|
| 41 |
-
# or: python -m tracefix_rl.
|
| 42 |
-
server = "tracefix_rl.
|
| 43 |
|
| 44 |
[tool.setuptools]
|
| 45 |
include-package-data = true
|
| 46 |
-
packages = ["tracefix_rl", "tracefix_rl.
|
| 47 |
-
package-dir = { "tracefix_rl" = ".", "tracefix_rl.
|
|
|
|
| 38 |
|
| 39 |
[project.scripts]
|
| 40 |
# Server entry point - enables running via: uv run --project . server
|
| 41 |
+
# or: python -m tracefix_rl.server.app
|
| 42 |
+
server = "tracefix_rl.server.app:main"
|
| 43 |
|
| 44 |
[tool.setuptools]
|
| 45 |
include-package-data = true
|
| 46 |
+
packages = ["tracefix_rl", "tracefix_rl.server", "tracefix_rl.core", "tracefix_rl.tasks"]
|
| 47 |
+
package-dir = { "tracefix_rl" = ".", "tracefix_rl.server" = "server", "tracefix_rl.core" = "core", "tracefix_rl.tasks" = "tasks" }
|
vision_ui.py
CHANGED
|
@@ -512,7 +512,7 @@ with gr.Blocks(title="TraceFix-RL") as demo:
|
|
| 512 |
max_steps = gr.Number(label="Max Steps", value=int(os.getenv("MAX_STEPS", "50")), precision=0)
|
| 513 |
success_score_threshold = gr.Number(
|
| 514 |
label="Success Score Threshold",
|
| 515 |
-
value=float(os.getenv("SUCCESS_SCORE_THRESHOLD", "0.
|
| 516 |
precision=2,
|
| 517 |
)
|
| 518 |
show_thought = gr.Checkbox(label="Stream Thought Trace", value=False)
|
|
|
|
| 512 |
max_steps = gr.Number(label="Max Steps", value=int(os.getenv("MAX_STEPS", "50")), precision=0)
|
| 513 |
success_score_threshold = gr.Number(
|
| 514 |
label="Success Score Threshold",
|
| 515 |
+
value=float(os.getenv("SUCCESS_SCORE_THRESHOLD", "0.98")),
|
| 516 |
precision=2,
|
| 517 |
)
|
| 518 |
show_thought = gr.Checkbox(label="Stream Thought Trace", value=False)
|