databoysu commited on
Commit
49d9b3d
·
1 Parent(s): dcdd52f

clamp logic fix

Browse files
.dockerignore CHANGED
@@ -29,4 +29,6 @@ outputs/
29
 
30
  CLAUDE.md
31
  package.json
32
- package-lock.json
 
 
 
29
 
30
  CLAUDE.md
31
  package.json
32
+ package-lock.json
33
+ openenv_tracefix_rl.egg-info/
34
+ test_redirect.py
.gitignore CHANGED
@@ -12,4 +12,6 @@ package-lock.json
12
  .agent/
13
  **/__pycache__/
14
  **/.venv/
15
- **/node_modules/
 
 
 
12
  .agent/
13
  **/__pycache__/
14
  **/.venv/
15
+ **/node_modules/
16
+ openenv_tracefix_rl.egg-info/
17
+ test_redirect.py
.hfignore CHANGED
@@ -33,3 +33,6 @@ build/
33
  outputs/
34
  *.log
35
  .github/
 
 
 
 
33
  outputs/
34
  *.log
35
  .github/
36
+
37
+ openenv_tracefix_rl.egg-info/
38
+ test_redirect.py
Dockerfile CHANGED
@@ -52,4 +52,4 @@ WORKDIR /app/env
52
 
53
  USER appuser
54
 
55
- CMD ["uvicorn", "backend.app:app", "--host", "0.0.0.0", "--port", "7860"]
 
52
 
53
  USER appuser
54
 
55
+ CMD ["uvicorn", "server.app:app", "--host", "0.0.0.0", "--port", "7860"]
README.md CHANGED
@@ -54,7 +54,7 @@ Every task contains: `name`, `description`, `difficulty`, `bug_type`, `code` (bu
54
 
55
  This environment enforces strict typing and uses standard modern tooling:
56
  - **`uv`:** Handles dependency management (see `pyproject.toml`).
57
- - **FastAPI:** Provides the `backend.app` integration layer for OpenEnv compliance.
58
  - **Pydantic (v2):** Provides strong validation layers for `models.py` (e.g., `CodeAction`, `CodeObservation`).
59
  - **OpenEnv Config:** See `openenv.yaml` which specifies `tracefix_rl` to run the FastAPI app on port `7860`.
60
 
@@ -63,7 +63,7 @@ This environment enforces strict typing and uses standard modern tooling:
63
  - `tasks.py`: Task metadata definitions.
64
  - `sandbox.py`: Subprocess runtime and output tracking.
65
  - `environment.py`: Reset/step/reward core RL loop logic (`TraceFixRLGym`).
66
- - `backend/tracefix_rl_environment.py` / `backend/app.py`: Maps the OpenAI/OpenEnv network interface to the core environment.
67
  - `inference.py`: Baseline OpenAI-client inference script to evaluate agents.
68
 
69
  ## Local Development
 
54
 
55
  This environment enforces strict typing and uses standard modern tooling:
56
  - **`uv`:** Handles dependency management (see `pyproject.toml`).
57
+ - **FastAPI:** Provides the `server.app` integration layer for OpenEnv compliance.
58
  - **Pydantic (v2):** Provides strong validation layers for `models.py` (e.g., `CodeAction`, `CodeObservation`).
59
  - **OpenEnv Config:** See `openenv.yaml` which specifies `tracefix_rl` to run the FastAPI app on port `7860`.
60
 
 
63
  - `tasks.py`: Task metadata definitions.
64
  - `sandbox.py`: Subprocess runtime and output tracking.
65
  - `environment.py`: Reset/step/reward core RL loop logic (`TraceFixRLGym`).
66
+ - `server/tracefix_rl_environment.py` / `server/app.py`: Maps the OpenAI/OpenEnv network interface to the core environment.
67
  - `inference.py`: Baseline OpenAI-client inference script to evaluate agents.
68
 
69
  ## Local Development
backend/__init__.py DELETED
@@ -1,5 +0,0 @@
1
- """TraceFix-RL backend components."""
2
-
3
- from .tracefix_rl_environment import TraceFixRLEnvironment
4
-
5
- __all__ = ["TraceFixRLEnvironment"]
 
 
 
 
 
 
backend/app.py DELETED
@@ -1,57 +0,0 @@
1
- """FastAPI entry point for TraceFix-RL."""
2
-
3
- import gradio as gr
4
- from vision_ui import demo
5
-
6
- try:
7
- from openenv.core.env_server.http_server import create_app
8
- except Exception as e: # pragma: no cover
9
- raise ImportError(
10
- "openenv is required for the web interface. Install dependencies with '\n uv sync\n'"
11
- ) from e
12
-
13
- try:
14
- from core.models import CodeAction, CodeObservation
15
- from backend.tracefix_rl_environment import TraceFixRLEnvironment
16
- except ImportError:
17
- import sys
18
- from pathlib import Path
19
-
20
- sys.path.insert(0, str(Path(__file__).resolve().parents[1]))
21
- from core.models import CodeAction, CodeObservation
22
- from backend.tracefix_rl_environment import TraceFixRLEnvironment
23
-
24
-
25
- app = create_app(
26
- TraceFixRLEnvironment,
27
- CodeAction,
28
- CodeObservation,
29
- env_name="tracefix_rl",
30
- max_concurrent_envs=1,
31
- )
32
-
33
- from fastapi.responses import RedirectResponse
34
-
35
- @app.get("/", include_in_schema=False)
36
- async def root_redirect():
37
- return RedirectResponse(url="/web/")
38
-
39
- @app.get("/web", include_in_schema=False)
40
- async def web_no_slash_redirect():
41
- return RedirectResponse(url="/web/")
42
-
43
- app = gr.mount_gradio_app(app, demo, path="/web")
44
-
45
-
46
- def main() -> None:
47
- """Entry point for local and container execution."""
48
- import os
49
- import uvicorn
50
-
51
- host = os.environ.get("HOST", "0.0.0.0")
52
- port = int(os.environ.get("PORT", "7860"))
53
- uvicorn.run(app, host=host, port=port)
54
-
55
-
56
- if __name__ == "__main__":
57
- main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
backend/tracefix_rl_environment.py DELETED
@@ -1,67 +0,0 @@
1
- """OpenEnv adapter around the TraceFix-RL core environment."""
2
-
3
- from openenv.core.env_server.interfaces import Environment
4
- from openenv.core.env_server.types import State
5
-
6
- try:
7
- from core.environment import TraceFixRLGym
8
- from core.models import CodeAction, CodeObservation
9
- except ImportError:
10
- from core.environment import TraceFixRLGym
11
- from core.models import CodeAction, CodeObservation
12
-
13
-
14
- class TraceFixRLEnvironment(Environment):
15
- """Environment implementation compatible with OpenEnv's server interface."""
16
-
17
- SUPPORTS_CONCURRENT_SESSIONS: bool = True
18
-
19
- def __init__(self):
20
- self._gym = TraceFixRLGym()
21
- self._state = State(episode_id="", step_count=0)
22
-
23
- def reset(self, difficulty: str | None = None, task_name: str | None = None) -> CodeObservation:
24
- if difficulty == "easy":
25
- self._gym.training_step = 1
26
- elif difficulty == "medium":
27
- self._gym.training_step = 2000
28
- elif difficulty == "hard":
29
- self._gym.training_step = 6000
30
-
31
- task_dict = None
32
- if task_name and task_name != "tracefix_rl":
33
- try:
34
- from tasks.tasks import ALL_TASKS
35
- for t in ALL_TASKS:
36
- if t.get("name") == task_name:
37
- task_dict = t
38
- break
39
- except ImportError:
40
- pass
41
-
42
- obs, system_prompt = self._gym.reset(task_index=task_dict)
43
- self._state = State(
44
- episode_id=obs.info.get("episode_id", ""),
45
- step_count=obs.step_count,
46
- )
47
- metadata = dict(obs.metadata or {})
48
- metadata["system_prompt"] = system_prompt
49
- obs.metadata = metadata
50
- return obs
51
-
52
- def step(self, action: CodeAction) -> CodeObservation: # type: ignore[override]
53
- obs, reward, done, info = self._gym.step(action)
54
- obs.reward = reward
55
- obs.done = done
56
- metadata = dict(obs.metadata or {})
57
- metadata.update(info)
58
- obs.metadata = metadata
59
- self._state = State(
60
- episode_id=obs.info.get("episode_id", ""),
61
- step_count=obs.step_count,
62
- )
63
- return obs
64
-
65
- @property
66
- def state(self) -> State:
67
- return self._state
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
core/environment.py CHANGED
@@ -298,7 +298,7 @@ class TraceFixRLGym:
298
  total = len(results)
299
  passes = 0 if syntax_err else sum(1 for t in results if t.passed)
300
  raw = (passes / total if total > 0 else 0.0) - self._accumulated_step_costs
301
- reward = max(0.0, min(1.0, raw))
302
  self._last_output += (
303
  f"\n⚠ Max steps ({MAX_STEPS}) reached. "
304
  f"Auto-evaluated: {passes}/{total} tests passing. "
@@ -314,7 +314,7 @@ class TraceFixRLGym:
314
  "step": self._step_count,
315
  }
316
  if self._done:
317
- info["final_score"] = max(0.0, min(1.0, round(reward, 4)))
318
 
319
  return obs, round(reward, 4), self._done, info
320
 
@@ -467,7 +467,7 @@ class TraceFixRLGym:
467
 
468
  proportion = passes / total if total > 0 else 0.0
469
  raw_score = proportion - self._accumulated_step_costs
470
- final_score = max(0.0, min(1.0, raw_score))
471
 
472
  if not syntax_err:
473
  if passes == total:
 
298
  total = len(results)
299
  passes = 0 if syntax_err else sum(1 for t in results if t.passed)
300
  raw = (passes / total if total > 0 else 0.0) - self._accumulated_step_costs
301
+ reward = max(0.01, min(0.99, raw))
302
  self._last_output += (
303
  f"\n⚠ Max steps ({MAX_STEPS}) reached. "
304
  f"Auto-evaluated: {passes}/{total} tests passing. "
 
314
  "step": self._step_count,
315
  }
316
  if self._done:
317
+ info["final_score"] = max(0.01, min(0.99, round(reward, 4)))
318
 
319
  return obs, round(reward, 4), self._done, info
320
 
 
467
 
468
  proportion = passes / total if total > 0 else 0.0
469
  raw_score = proportion - self._accumulated_step_costs
470
+ final_score = max(0.01, min(0.99, raw_score))
471
 
472
  if not syntax_err:
473
  if passes == total:
inference.py CHANGED
@@ -46,7 +46,7 @@ ENV_BASE_URL = os.getenv("ENV_BASE_URL", "http://127.0.0.1:7860")
46
  TASK_NAME = os.getenv("TASK_NAME", "tracefix_rl")
47
  BENCHMARK = os.getenv("BENCHMARK", "tracefix_rl")
48
  MAX_STEPS = int(os.getenv("MAX_STEPS", "50"))
49
- SUCCESS_SCORE_THRESHOLD = float(os.getenv("SUCCESS_SCORE_THRESHOLD", "0.99"))
50
 
51
  SYSTEM_PROMPT = """\
52
  You are a deterministic debugging policy agent.
@@ -296,7 +296,7 @@ def _compute_score(step_result: Any, rewards: list[float]) -> float:
296
  raw = info.get("final_score")
297
  if raw is None:
298
  raw = sum(rewards)
299
- return max(0.0, min(1.0, float(raw)))
300
 
301
 
302
  async def run(difficulty: Optional[str] = None, show_thought: bool = False) -> None:
 
46
  TASK_NAME = os.getenv("TASK_NAME", "tracefix_rl")
47
  BENCHMARK = os.getenv("BENCHMARK", "tracefix_rl")
48
  MAX_STEPS = int(os.getenv("MAX_STEPS", "50"))
49
+ SUCCESS_SCORE_THRESHOLD = float(os.getenv("SUCCESS_SCORE_THRESHOLD", "0.98"))
50
 
51
  SYSTEM_PROMPT = """\
52
  You are a deterministic debugging policy agent.
 
296
  raw = info.get("final_score")
297
  if raw is None:
298
  raw = sum(rewards)
299
+ return max(0.01, min(0.99, float(raw)))
300
 
301
 
302
  async def run(difficulty: Optional[str] = None, show_thought: bool = False) -> None:
openenv.yaml CHANGED
@@ -2,5 +2,5 @@ spec_version: 1
2
  name: tracefix_rl
3
  type: space
4
  runtime: fastapi
5
- app: backend.app:app
6
  port: 7860
 
2
  name: tracefix_rl
3
  type: space
4
  runtime: fastapi
5
+ app: server.app:app
6
  port: 7860
pyproject.toml CHANGED
@@ -38,10 +38,10 @@ dev = [
38
 
39
  [project.scripts]
40
  # Server entry point - enables running via: uv run --project . server
41
- # or: python -m tracefix_rl.backend.app
42
- server = "tracefix_rl.backend.app:main"
43
 
44
  [tool.setuptools]
45
  include-package-data = true
46
- packages = ["tracefix_rl", "tracefix_rl.backend", "tracefix_rl.core", "tracefix_rl.tasks"]
47
- package-dir = { "tracefix_rl" = ".", "tracefix_rl.backend" = "backend", "tracefix_rl.core" = "core", "tracefix_rl.tasks" = "tasks" }
 
38
 
39
  [project.scripts]
40
  # Server entry point - enables running via: uv run --project . server
41
+ # or: python -m tracefix_rl.server.app
42
+ server = "tracefix_rl.server.app:main"
43
 
44
  [tool.setuptools]
45
  include-package-data = true
46
+ packages = ["tracefix_rl", "tracefix_rl.server", "tracefix_rl.core", "tracefix_rl.tasks"]
47
+ package-dir = { "tracefix_rl" = ".", "tracefix_rl.server" = "server", "tracefix_rl.core" = "core", "tracefix_rl.tasks" = "tasks" }
vision_ui.py CHANGED
@@ -512,7 +512,7 @@ with gr.Blocks(title="TraceFix-RL") as demo:
512
  max_steps = gr.Number(label="Max Steps", value=int(os.getenv("MAX_STEPS", "50")), precision=0)
513
  success_score_threshold = gr.Number(
514
  label="Success Score Threshold",
515
- value=float(os.getenv("SUCCESS_SCORE_THRESHOLD", "0.99")),
516
  precision=2,
517
  )
518
  show_thought = gr.Checkbox(label="Stream Thought Trace", value=False)
 
512
  max_steps = gr.Number(label="Max Steps", value=int(os.getenv("MAX_STEPS", "50")), precision=0)
513
  success_score_threshold = gr.Number(
514
  label="Success Score Threshold",
515
+ value=float(os.getenv("SUCCESS_SCORE_THRESHOLD", "0.98")),
516
  precision=2,
517
  )
518
  show_thought = gr.Checkbox(label="Stream Thought Trace", value=False)