Spaces:
Sleeping
Sleeping
databoysu commited on
Commit ·
20ef9ad
1
Parent(s): 2e11c6a
TraceFix-RL v1 deploy
Browse files- README.md +22 -0
- __pycache__/client.cpython-312.pyc +0 -0
- __pycache__/context.cpython-312.pyc +0 -0
- __pycache__/environment.cpython-312.pyc +0 -0
- __pycache__/models.cpython-312.pyc +0 -0
- __pycache__/sandbox.cpython-312.pyc +0 -0
- inference.py +48 -11
- server/__pycache__/__init__.cpython-312.pyc +0 -0
- server/tracefix_rl_environment.py +8 -1
README.md
CHANGED
|
@@ -58,6 +58,7 @@ Tasks are organized in `tasks.py` into three tiers.
|
|
| 58 |
Focus: data-structure invariants, eviction/promotion logic, bracket mapping, and interval merging edge behavior.
|
| 59 |
|
| 60 |
Every task follows the same schema:
|
|
|
|
| 61 |
- `name`, `description`, `difficulty`, `bug_type`
|
| 62 |
- `code`: buggy implementation (line list)
|
| 63 |
- `solution`: reference implementation
|
|
@@ -83,6 +84,7 @@ uv run --project . server
|
|
| 83 |
```
|
| 84 |
|
| 85 |
Server endpoints:
|
|
|
|
| 86 |
- `POST /reset`
|
| 87 |
- `POST /step`
|
| 88 |
- `GET /health`
|
|
@@ -100,6 +102,26 @@ Server endpoints:
|
|
| 100 |
`[START]`, one or more `[STEP]`, then `[END]`.
|
| 101 |
- Final score is clamped to `[0, 1]`.
|
| 102 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 103 |
## Docker + Deployment
|
| 104 |
|
| 105 |
Build locally:
|
|
|
|
| 58 |
Focus: data-structure invariants, eviction/promotion logic, bracket mapping, and interval merging edge behavior.
|
| 59 |
|
| 60 |
Every task follows the same schema:
|
| 61 |
+
|
| 62 |
- `name`, `description`, `difficulty`, `bug_type`
|
| 63 |
- `code`: buggy implementation (line list)
|
| 64 |
- `solution`: reference implementation
|
|
|
|
| 84 |
```
|
| 85 |
|
| 86 |
Server endpoints:
|
| 87 |
+
|
| 88 |
- `POST /reset`
|
| 89 |
- `POST /step`
|
| 90 |
- `GET /health`
|
|
|
|
| 102 |
`[START]`, one or more `[STEP]`, then `[END]`.
|
| 103 |
- Final score is clamped to `[0, 1]`.
|
| 104 |
|
| 105 |
+
## Inference Flags
|
| 106 |
+
|
| 107 |
+
`inference.py` supports:
|
| 108 |
+
|
| 109 |
+
- `--easy`: run episode using easy-tier curriculum sampling.
|
| 110 |
+
- `--medium`: run episode using medium-tier curriculum sampling.
|
| 111 |
+
- `--hard`: run episode using hard-tier curriculum sampling.
|
| 112 |
+
- `--debug`: print raw model response snippets for troubleshooting.
|
| 113 |
+
|
| 114 |
+
Example:
|
| 115 |
+
|
| 116 |
+
```bash
|
| 117 |
+
python inference.py --medium --debug
|
| 118 |
+
```
|
| 119 |
+
|
| 120 |
+
The script also enforces a model-thinking/output cap:
|
| 121 |
+
|
| 122 |
+
- `THINKING_TOKEN_LIMIT` (default `512`) is used as `max_tokens` in model calls.
|
| 123 |
+
- `thought` content is hard-truncated before action validation to prevent oversized payloads.
|
| 124 |
+
|
| 125 |
## Docker + Deployment
|
| 126 |
|
| 127 |
Build locally:
|
__pycache__/client.cpython-312.pyc
CHANGED
|
Binary files a/__pycache__/client.cpython-312.pyc and b/__pycache__/client.cpython-312.pyc differ
|
|
|
__pycache__/context.cpython-312.pyc
CHANGED
|
Binary files a/__pycache__/context.cpython-312.pyc and b/__pycache__/context.cpython-312.pyc differ
|
|
|
__pycache__/environment.cpython-312.pyc
CHANGED
|
Binary files a/__pycache__/environment.cpython-312.pyc and b/__pycache__/environment.cpython-312.pyc differ
|
|
|
__pycache__/models.cpython-312.pyc
CHANGED
|
Binary files a/__pycache__/models.cpython-312.pyc and b/__pycache__/models.cpython-312.pyc differ
|
|
|
__pycache__/sandbox.cpython-312.pyc
CHANGED
|
Binary files a/__pycache__/sandbox.cpython-312.pyc and b/__pycache__/sandbox.cpython-312.pyc differ
|
|
|
inference.py
CHANGED
|
@@ -15,17 +15,23 @@ This script prints exactly:
|
|
| 15 |
|
| 16 |
from __future__ import annotations
|
| 17 |
|
|
|
|
| 18 |
import asyncio
|
| 19 |
import json
|
| 20 |
import os
|
| 21 |
import re
|
| 22 |
-
|
|
|
|
|
|
|
| 23 |
|
| 24 |
from openai import OpenAI
|
| 25 |
|
| 26 |
try:
|
| 27 |
from tracefix_rl import CodeAction, TraceFixRLEnv
|
| 28 |
-
except
|
|
|
|
|
|
|
|
|
|
| 29 |
from client import TraceFixRLEnv
|
| 30 |
from models import CodeAction
|
| 31 |
|
|
@@ -40,6 +46,9 @@ TASK_NAME = os.getenv("TASK_NAME", "tracefix_rl")
|
|
| 40 |
BENCHMARK = os.getenv("BENCHMARK", "tracefix_rl")
|
| 41 |
MAX_STEPS = int(os.getenv("MAX_STEPS", "50"))
|
| 42 |
SUCCESS_SCORE_THRESHOLD = float(os.getenv("SUCCESS_SCORE_THRESHOLD", "0.99"))
|
|
|
|
|
|
|
|
|
|
| 43 |
|
| 44 |
SYSTEM_PROMPT = (
|
| 45 |
"You are controlling a Python debugging RL environment. "
|
|
@@ -54,7 +63,7 @@ def log_start(task: str, env: str, model: str) -> None:
|
|
| 54 |
print(f"[START] task={task} env={env} model={model}", flush=True)
|
| 55 |
|
| 56 |
|
| 57 |
-
def log_step(step: int, action: str, reward: float, done: bool, error: str
|
| 58 |
error_value = error if error else "null"
|
| 59 |
print(
|
| 60 |
f"[STEP] step={step} action={action} reward={reward:.2f} done={str(done).lower()} error={error_value}",
|
|
@@ -106,7 +115,9 @@ def _build_observation_text(observation: Any) -> str:
|
|
| 106 |
)
|
| 107 |
|
| 108 |
|
| 109 |
-
def _get_model_action(
|
|
|
|
|
|
|
| 110 |
obs_text = _build_observation_text(observation)
|
| 111 |
user_prompt = (
|
| 112 |
"Pick the single best next action and return only JSON.\n\n"
|
|
@@ -121,10 +132,12 @@ def _get_model_action(client: OpenAI, observation: Any, history: list[str]) -> d
|
|
| 121 |
{"role": "user", "content": user_prompt},
|
| 122 |
],
|
| 123 |
temperature=0.0,
|
| 124 |
-
max_tokens=
|
| 125 |
stream=False,
|
| 126 |
)
|
| 127 |
response_text = (completion.choices[0].message.content or "").strip()
|
|
|
|
|
|
|
| 128 |
action = _extract_json(response_text)
|
| 129 |
except Exception:
|
| 130 |
action = {"action_type": "RUN_TESTS"}
|
|
@@ -143,9 +156,13 @@ def _get_model_action(client: OpenAI, observation: Any, history: list[str]) -> d
|
|
| 143 |
|
| 144 |
|
| 145 |
def _to_code_action(action_dict: dict[str, Any]) -> CodeAction:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 146 |
payload = {
|
| 147 |
"action_type": action_dict.get("action_type", "RUN_TESTS"),
|
| 148 |
-
"thought":
|
| 149 |
"start_line": action_dict.get("start_line"),
|
| 150 |
"end_line": action_dict.get("end_line"),
|
| 151 |
"new_code_block": action_dict.get("new_code_block"),
|
|
@@ -167,10 +184,10 @@ def _compute_score(step_result: Any, rewards: list[float]) -> float:
|
|
| 167 |
return max(0.0, min(1.0, float(raw)))
|
| 168 |
|
| 169 |
|
| 170 |
-
async def
|
| 171 |
client = OpenAI(base_url=API_BASE_URL, api_key=HF_TOKEN)
|
| 172 |
|
| 173 |
-
env: TraceFixRLEnv
|
| 174 |
rewards: list[float] = []
|
| 175 |
history: list[str] = []
|
| 176 |
steps_taken = 0
|
|
@@ -184,7 +201,11 @@ async def main() -> None:
|
|
| 184 |
else:
|
| 185 |
env = TraceFixRLEnv(base_url=ENV_BASE_URL)
|
| 186 |
|
| 187 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 188 |
task_name = result.observation.info.get("task_name") or TASK_NAME
|
| 189 |
log_start(task=task_name, env=BENCHMARK, model=MODEL_NAME)
|
| 190 |
started = True
|
|
@@ -193,7 +214,7 @@ async def main() -> None:
|
|
| 193 |
if result.done:
|
| 194 |
break
|
| 195 |
|
| 196 |
-
action_dict = _get_model_action(client, result.observation, history)
|
| 197 |
action = _to_code_action(action_dict)
|
| 198 |
result = await env.step(action)
|
| 199 |
|
|
@@ -238,4 +259,20 @@ async def main() -> None:
|
|
| 238 |
|
| 239 |
|
| 240 |
if __name__ == "__main__":
|
| 241 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
|
| 16 |
from __future__ import annotations
|
| 17 |
|
| 18 |
+
import argparse
|
| 19 |
import asyncio
|
| 20 |
import json
|
| 21 |
import os
|
| 22 |
import re
|
| 23 |
+
import sys
|
| 24 |
+
from pathlib import Path
|
| 25 |
+
from typing import Any, Optional
|
| 26 |
|
| 27 |
from openai import OpenAI
|
| 28 |
|
| 29 |
try:
|
| 30 |
from tracefix_rl import CodeAction, TraceFixRLEnv
|
| 31 |
+
except Exception:
|
| 32 |
+
ROOT_DIR = Path(__file__).resolve().parent
|
| 33 |
+
if str(ROOT_DIR) not in sys.path:
|
| 34 |
+
sys.path.insert(0, str(ROOT_DIR))
|
| 35 |
from client import TraceFixRLEnv
|
| 36 |
from models import CodeAction
|
| 37 |
|
|
|
|
| 46 |
BENCHMARK = os.getenv("BENCHMARK", "tracefix_rl")
|
| 47 |
MAX_STEPS = int(os.getenv("MAX_STEPS", "50"))
|
| 48 |
SUCCESS_SCORE_THRESHOLD = float(os.getenv("SUCCESS_SCORE_THRESHOLD", "0.99"))
|
| 49 |
+
THINKING_TOKEN_LIMIT = int(os.getenv("THINKING_TOKEN_LIMIT", "512"))
|
| 50 |
+
# Approximation used for hard truncation before sending to server.
|
| 51 |
+
THINKING_CHAR_LIMIT = THINKING_TOKEN_LIMIT * 4
|
| 52 |
|
| 53 |
SYSTEM_PROMPT = (
|
| 54 |
"You are controlling a Python debugging RL environment. "
|
|
|
|
| 63 |
print(f"[START] task={task} env={env} model={model}", flush=True)
|
| 64 |
|
| 65 |
|
| 66 |
+
def log_step(step: int, action: str, reward: float, done: bool, error: Optional[str]) -> None:
|
| 67 |
error_value = error if error else "null"
|
| 68 |
print(
|
| 69 |
f"[STEP] step={step} action={action} reward={reward:.2f} done={str(done).lower()} error={error_value}",
|
|
|
|
| 115 |
)
|
| 116 |
|
| 117 |
|
| 118 |
+
def _get_model_action(
|
| 119 |
+
client: OpenAI, observation: Any, history: list[str], debug: bool = False
|
| 120 |
+
) -> dict[str, Any]:
|
| 121 |
obs_text = _build_observation_text(observation)
|
| 122 |
user_prompt = (
|
| 123 |
"Pick the single best next action and return only JSON.\n\n"
|
|
|
|
| 132 |
{"role": "user", "content": user_prompt},
|
| 133 |
],
|
| 134 |
temperature=0.0,
|
| 135 |
+
max_tokens=THINKING_TOKEN_LIMIT,
|
| 136 |
stream=False,
|
| 137 |
)
|
| 138 |
response_text = (completion.choices[0].message.content or "").strip()
|
| 139 |
+
if debug:
|
| 140 |
+
print(f"[DEBUG] raw_model_response={response_text[:500]}", flush=True)
|
| 141 |
action = _extract_json(response_text)
|
| 142 |
except Exception:
|
| 143 |
action = {"action_type": "RUN_TESTS"}
|
|
|
|
| 156 |
|
| 157 |
|
| 158 |
def _to_code_action(action_dict: dict[str, Any]) -> CodeAction:
|
| 159 |
+
thought = action_dict.get("thought")
|
| 160 |
+
if isinstance(thought, str):
|
| 161 |
+
thought = thought[:THINKING_CHAR_LIMIT]
|
| 162 |
+
|
| 163 |
payload = {
|
| 164 |
"action_type": action_dict.get("action_type", "RUN_TESTS"),
|
| 165 |
+
"thought": thought,
|
| 166 |
"start_line": action_dict.get("start_line"),
|
| 167 |
"end_line": action_dict.get("end_line"),
|
| 168 |
"new_code_block": action_dict.get("new_code_block"),
|
|
|
|
| 184 |
return max(0.0, min(1.0, float(raw)))
|
| 185 |
|
| 186 |
|
| 187 |
+
async def run(difficulty: Optional[str] = None, debug: bool = False) -> None:
|
| 188 |
client = OpenAI(base_url=API_BASE_URL, api_key=HF_TOKEN)
|
| 189 |
|
| 190 |
+
env: Optional[TraceFixRLEnv] = None
|
| 191 |
rewards: list[float] = []
|
| 192 |
history: list[str] = []
|
| 193 |
steps_taken = 0
|
|
|
|
| 201 |
else:
|
| 202 |
env = TraceFixRLEnv(base_url=ENV_BASE_URL)
|
| 203 |
|
| 204 |
+
if difficulty:
|
| 205 |
+
reset_kwargs = {"difficulty": difficulty}
|
| 206 |
+
result = await env.reset(**reset_kwargs)
|
| 207 |
+
else:
|
| 208 |
+
result = await env.reset()
|
| 209 |
task_name = result.observation.info.get("task_name") or TASK_NAME
|
| 210 |
log_start(task=task_name, env=BENCHMARK, model=MODEL_NAME)
|
| 211 |
started = True
|
|
|
|
| 214 |
if result.done:
|
| 215 |
break
|
| 216 |
|
| 217 |
+
action_dict = _get_model_action(client, result.observation, history, debug=debug)
|
| 218 |
action = _to_code_action(action_dict)
|
| 219 |
result = await env.step(action)
|
| 220 |
|
|
|
|
| 259 |
|
| 260 |
|
| 261 |
if __name__ == "__main__":
|
| 262 |
+
parser = argparse.ArgumentParser(description="Run TraceFix-RL inference baseline.")
|
| 263 |
+
group = parser.add_mutually_exclusive_group()
|
| 264 |
+
group.add_argument("--easy", action="store_true", help="Run on easy curriculum tier.")
|
| 265 |
+
group.add_argument("--medium", action="store_true", help="Run on medium curriculum tier.")
|
| 266 |
+
group.add_argument("--hard", action="store_true", help="Run on hard curriculum tier.")
|
| 267 |
+
parser.add_argument("--debug", action="store_true", help="Print debug model output snippets.")
|
| 268 |
+
args = parser.parse_args()
|
| 269 |
+
|
| 270 |
+
difficulty: Optional[str] = None
|
| 271 |
+
if args.easy:
|
| 272 |
+
difficulty = "easy"
|
| 273 |
+
elif args.medium:
|
| 274 |
+
difficulty = "medium"
|
| 275 |
+
elif args.hard:
|
| 276 |
+
difficulty = "hard"
|
| 277 |
+
|
| 278 |
+
asyncio.run(run(difficulty=difficulty, debug=args.debug))
|
server/__pycache__/__init__.cpython-312.pyc
CHANGED
|
Binary files a/server/__pycache__/__init__.cpython-312.pyc and b/server/__pycache__/__init__.cpython-312.pyc differ
|
|
|
server/tracefix_rl_environment.py
CHANGED
|
@@ -20,7 +20,14 @@ class TraceFixRLEnvironment(Environment):
|
|
| 20 |
self._gym = TraceFixRLGym()
|
| 21 |
self._state = State(episode_id="", step_count=0)
|
| 22 |
|
| 23 |
-
def reset(self) -> CodeObservation:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
obs, system_prompt = self._gym.reset()
|
| 25 |
self._state = State(
|
| 26 |
episode_id=obs.info.get("episode_id", ""),
|
|
|
|
| 20 |
self._gym = TraceFixRLGym()
|
| 21 |
self._state = State(episode_id="", step_count=0)
|
| 22 |
|
| 23 |
+
def reset(self, difficulty: str | None = None) -> CodeObservation:
|
| 24 |
+
if difficulty == "easy":
|
| 25 |
+
self._gym.training_step = 1
|
| 26 |
+
elif difficulty == "medium":
|
| 27 |
+
self._gym.training_step = 2000
|
| 28 |
+
elif difficulty == "hard":
|
| 29 |
+
self._gym.training_step = 6000
|
| 30 |
+
|
| 31 |
obs, system_prompt = self._gym.reset()
|
| 32 |
self._state = State(
|
| 33 |
episode_id=obs.info.get("episode_id", ""),
|