Spaces:

SolusOps
/

tracefix_rl

Sleeping

App Files Files Community

databoysu commited on Apr 8

Commit

20ef9ad

1 Parent(s): 2e11c6a

TraceFix-RL v1 deploy

Browse files

Files changed (9) hide show

README.md +22 -0
__pycache__/client.cpython-312.pyc +0 -0
__pycache__/context.cpython-312.pyc +0 -0
__pycache__/environment.cpython-312.pyc +0 -0
__pycache__/models.cpython-312.pyc +0 -0
__pycache__/sandbox.cpython-312.pyc +0 -0
inference.py +48 -11
server/__pycache__/__init__.cpython-312.pyc +0 -0
server/tracefix_rl_environment.py +8 -1

README.md CHANGED Viewed

@@ -58,6 +58,7 @@ Tasks are organized in `tasks.py` into three tiers.
   Focus: data-structure invariants, eviction/promotion logic, bracket mapping, and interval merging edge behavior.
 Every task follows the same schema:
 - `name`, `description`, `difficulty`, `bug_type`
 - `code`: buggy implementation (line list)
 - `solution`: reference implementation
@@ -83,6 +84,7 @@ uv run --project . server
 ```
 Server endpoints:
 - `POST /reset`
 - `POST /step`
 - `GET /health`
@@ -100,6 +102,26 @@ Server endpoints:
 `[START]`, one or more `[STEP]`, then `[END]`.
 - Final score is clamped to `[0, 1]`.
 ## Docker + Deployment
 Build locally:

   Focus: data-structure invariants, eviction/promotion logic, bracket mapping, and interval merging edge behavior.
 Every task follows the same schema:
 - `name`, `description`, `difficulty`, `bug_type`
 - `code`: buggy implementation (line list)
 - `solution`: reference implementation
 ```
 Server endpoints:
 - `POST /reset`
 - `POST /step`
 - `GET /health`
 `[START]`, one or more `[STEP]`, then `[END]`.
 - Final score is clamped to `[0, 1]`.
+## Inference Flags
+`inference.py` supports:
+- `--easy`: run episode using easy-tier curriculum sampling.
+- `--medium`: run episode using medium-tier curriculum sampling.
+- `--hard`: run episode using hard-tier curriculum sampling.
+- `--debug`: print raw model response snippets for troubleshooting.
+Example:
+```bash
+python inference.py --medium --debug
+```
+The script also enforces a model-thinking/output cap:
+- `THINKING_TOKEN_LIMIT` (default `512`) is used as `max_tokens` in model calls.
+- `thought` content is hard-truncated before action validation to prevent oversized payloads.
 ## Docker + Deployment
 Build locally:

__pycache__/client.cpython-312.pyc CHANGED Viewed

Binary files a/__pycache__/client.cpython-312.pyc and b/__pycache__/client.cpython-312.pyc differ

__pycache__/context.cpython-312.pyc CHANGED Viewed

Binary files a/__pycache__/context.cpython-312.pyc and b/__pycache__/context.cpython-312.pyc differ

__pycache__/environment.cpython-312.pyc CHANGED Viewed

Binary files a/__pycache__/environment.cpython-312.pyc and b/__pycache__/environment.cpython-312.pyc differ

__pycache__/models.cpython-312.pyc CHANGED Viewed

Binary files a/__pycache__/models.cpython-312.pyc and b/__pycache__/models.cpython-312.pyc differ

__pycache__/sandbox.cpython-312.pyc CHANGED Viewed

Binary files a/__pycache__/sandbox.cpython-312.pyc and b/__pycache__/sandbox.cpython-312.pyc differ

inference.py CHANGED Viewed

@@ -15,17 +15,23 @@ This script prints exactly:
 from __future__ import annotations
 import asyncio
 import json
 import os
 import re
-from typing import Any
 from openai import OpenAI
 try:
     from tracefix_rl import CodeAction, TraceFixRLEnv
-except ImportError:
     from client import TraceFixRLEnv
     from models import CodeAction
@@ -40,6 +46,9 @@ TASK_NAME = os.getenv("TASK_NAME", "tracefix_rl")
 BENCHMARK = os.getenv("BENCHMARK", "tracefix_rl")
 MAX_STEPS = int(os.getenv("MAX_STEPS", "50"))
 SUCCESS_SCORE_THRESHOLD = float(os.getenv("SUCCESS_SCORE_THRESHOLD", "0.99"))
 SYSTEM_PROMPT = (
     "You are controlling a Python debugging RL environment. "
@@ -54,7 +63,7 @@ def log_start(task: str, env: str, model: str) -> None:
     print(f"[START] task={task} env={env} model={model}", flush=True)
-def log_step(step: int, action: str, reward: float, done: bool, error: str | None) -> None:
     error_value = error if error else "null"
     print(
         f"[STEP] step={step} action={action} reward={reward:.2f} done={str(done).lower()} error={error_value}",
@@ -106,7 +115,9 @@ def _build_observation_text(observation: Any) -> str:
     )
-def _get_model_action(client: OpenAI, observation: Any, history: list[str]) -> dict[str, Any]:
     obs_text = _build_observation_text(observation)
     user_prompt = (
         "Pick the single best next action and return only JSON.\n\n"
@@ -121,10 +132,12 @@ def _get_model_action(client: OpenAI, observation: Any, history: list[str]) -> d
                 {"role": "user", "content": user_prompt},
             ],
             temperature=0.0,
-            max_tokens=300,
             stream=False,
         )
         response_text = (completion.choices[0].message.content or "").strip()
         action = _extract_json(response_text)
     except Exception:
         action = {"action_type": "RUN_TESTS"}
@@ -143,9 +156,13 @@ def _get_model_action(client: OpenAI, observation: Any, history: list[str]) -> d
 def _to_code_action(action_dict: dict[str, Any]) -> CodeAction:
     payload = {
         "action_type": action_dict.get("action_type", "RUN_TESTS"),
-        "thought": action_dict.get("thought"),
         "start_line": action_dict.get("start_line"),
         "end_line": action_dict.get("end_line"),
         "new_code_block": action_dict.get("new_code_block"),
@@ -167,10 +184,10 @@ def _compute_score(step_result: Any, rewards: list[float]) -> float:
     return max(0.0, min(1.0, float(raw)))
-async def main() -> None:
     client = OpenAI(base_url=API_BASE_URL, api_key=HF_TOKEN)
-    env: TraceFixRLEnv | None = None
     rewards: list[float] = []
     history: list[str] = []
     steps_taken = 0
@@ -184,7 +201,11 @@ async def main() -> None:
         else:
             env = TraceFixRLEnv(base_url=ENV_BASE_URL)
-        result = await env.reset()
         task_name = result.observation.info.get("task_name") or TASK_NAME
         log_start(task=task_name, env=BENCHMARK, model=MODEL_NAME)
         started = True
@@ -193,7 +214,7 @@ async def main() -> None:
             if result.done:
                 break
-            action_dict = _get_model_action(client, result.observation, history)
             action = _to_code_action(action_dict)
             result = await env.step(action)
@@ -238,4 +259,20 @@ async def main() -> None:
 if __name__ == "__main__":
-    asyncio.run(main())

 from __future__ import annotations
+import argparse
 import asyncio
 import json
 import os
 import re
+import sys
+from pathlib import Path
+from typing import Any, Optional
 from openai import OpenAI
 try:
     from tracefix_rl import CodeAction, TraceFixRLEnv
+except Exception:
+    ROOT_DIR = Path(__file__).resolve().parent
+    if str(ROOT_DIR) not in sys.path:
+        sys.path.insert(0, str(ROOT_DIR))
     from client import TraceFixRLEnv
     from models import CodeAction
 BENCHMARK = os.getenv("BENCHMARK", "tracefix_rl")
 MAX_STEPS = int(os.getenv("MAX_STEPS", "50"))
 SUCCESS_SCORE_THRESHOLD = float(os.getenv("SUCCESS_SCORE_THRESHOLD", "0.99"))
+THINKING_TOKEN_LIMIT = int(os.getenv("THINKING_TOKEN_LIMIT", "512"))
+# Approximation used for hard truncation before sending to server.
+THINKING_CHAR_LIMIT = THINKING_TOKEN_LIMIT * 4
 SYSTEM_PROMPT = (
     "You are controlling a Python debugging RL environment. "
     print(f"[START] task={task} env={env} model={model}", flush=True)
+def log_step(step: int, action: str, reward: float, done: bool, error: Optional[str]) -> None:
     error_value = error if error else "null"
     print(
         f"[STEP] step={step} action={action} reward={reward:.2f} done={str(done).lower()} error={error_value}",
     )
+def _get_model_action(
+    client: OpenAI, observation: Any, history: list[str], debug: bool = False
+) -> dict[str, Any]:
     obs_text = _build_observation_text(observation)
     user_prompt = (
         "Pick the single best next action and return only JSON.\n\n"
                 {"role": "user", "content": user_prompt},
             ],
             temperature=0.0,
+            max_tokens=THINKING_TOKEN_LIMIT,
             stream=False,
         )
         response_text = (completion.choices[0].message.content or "").strip()
+        if debug:
+            print(f"[DEBUG] raw_model_response={response_text[:500]}", flush=True)
         action = _extract_json(response_text)
     except Exception:
         action = {"action_type": "RUN_TESTS"}
 def _to_code_action(action_dict: dict[str, Any]) -> CodeAction:
+    thought = action_dict.get("thought")
+    if isinstance(thought, str):
+        thought = thought[:THINKING_CHAR_LIMIT]
     payload = {
         "action_type": action_dict.get("action_type", "RUN_TESTS"),
+        "thought": thought,
         "start_line": action_dict.get("start_line"),
         "end_line": action_dict.get("end_line"),
         "new_code_block": action_dict.get("new_code_block"),
     return max(0.0, min(1.0, float(raw)))
+async def run(difficulty: Optional[str] = None, debug: bool = False) -> None:
     client = OpenAI(base_url=API_BASE_URL, api_key=HF_TOKEN)
+    env: Optional[TraceFixRLEnv] = None
     rewards: list[float] = []
     history: list[str] = []
     steps_taken = 0
         else:
             env = TraceFixRLEnv(base_url=ENV_BASE_URL)
+        if difficulty:
+            reset_kwargs = {"difficulty": difficulty}
+            result = await env.reset(**reset_kwargs)
+        else:
+            result = await env.reset()
         task_name = result.observation.info.get("task_name") or TASK_NAME
         log_start(task=task_name, env=BENCHMARK, model=MODEL_NAME)
         started = True
             if result.done:
                 break
+            action_dict = _get_model_action(client, result.observation, history, debug=debug)
             action = _to_code_action(action_dict)
             result = await env.step(action)
 if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Run TraceFix-RL inference baseline.")
+    group = parser.add_mutually_exclusive_group()
+    group.add_argument("--easy", action="store_true", help="Run on easy curriculum tier.")
+    group.add_argument("--medium", action="store_true", help="Run on medium curriculum tier.")
+    group.add_argument("--hard", action="store_true", help="Run on hard curriculum tier.")
+    parser.add_argument("--debug", action="store_true", help="Print debug model output snippets.")
+    args = parser.parse_args()
+    difficulty: Optional[str] = None
+    if args.easy:
+        difficulty = "easy"
+    elif args.medium:
+        difficulty = "medium"
+    elif args.hard:
+        difficulty = "hard"
+    asyncio.run(run(difficulty=difficulty, debug=args.debug))

server/__pycache__/__init__.cpython-312.pyc CHANGED Viewed

Binary files a/server/__pycache__/__init__.cpython-312.pyc and b/server/__pycache__/__init__.cpython-312.pyc differ

server/tracefix_rl_environment.py CHANGED Viewed

@@ -20,7 +20,14 @@ class TraceFixRLEnvironment(Environment):
         self._gym = TraceFixRLGym()
         self._state = State(episode_id="", step_count=0)
-    def reset(self) -> CodeObservation:
         obs, system_prompt = self._gym.reset()
         self._state = State(
             episode_id=obs.info.get("episode_id", ""),

         self._gym = TraceFixRLGym()
         self._state = State(episode_id="", step_count=0)
+    def reset(self, difficulty: str | None = None) -> CodeObservation:
+        if difficulty == "easy":
+            self._gym.training_step = 1
+        elif difficulty == "medium":
+            self._gym.training_step = 2000
+        elif difficulty == "hard":
+            self._gym.training_step = 6000
         obs, system_prompt = self._gym.reset()
         self._state = State(
             episode_id=obs.info.get("episode_id", ""),