Spaces:

SolusOps
/

tracefix_rl

Sleeping

App Files Files Community

databoysu commited on Apr 8

Commit

9a026d7

1 Parent(s): f341e79

Thought enabling

Browse files

Files changed (3) hide show

__pycache__/models.cpython-312.pyc +0 -0
inference.py +7 -21
models.py +34 -56

__pycache__/models.cpython-312.pyc CHANGED Viewed

Binary files a/__pycache__/models.cpython-312.pyc and b/__pycache__/models.cpython-312.pyc differ

inference.py CHANGED Viewed

@@ -19,7 +19,6 @@ import argparse
 import asyncio
 import json
 import os
-import re
 import sys
 from pathlib import Path
 from typing import Any, Optional
@@ -46,7 +45,6 @@ TASK_NAME = os.getenv("TASK_NAME", "tracefix_rl")
 BENCHMARK = os.getenv("BENCHMARK", "tracefix_rl")
 MAX_STEPS = int(os.getenv("MAX_STEPS", "50"))
 SUCCESS_SCORE_THRESHOLD = float(os.getenv("SUCCESS_SCORE_THRESHOLD", "0.99"))
-THINKING_TOKEN_LIMIT = int(os.getenv("THINKING_TOKEN_LIMIT", "1000"))
 MAX_PARSE_RETRIES = 3
 SYSTEM_PROMPT = (
@@ -111,26 +109,15 @@ def log_end(success: bool, steps: int, score: float, rewards: list[float]) -> No
 def _extract_json(text: str) -> dict[str, Any]:
     stripped = text.strip()
     try:
         return json.loads(stripped)
-    except json.JSONDecodeError:
-        pass
-    fence = re.search(r"```(?:json)?\s*({.*?})\s*```", stripped, re.DOTALL)
-    if fence:
-        try:
-            return json.loads(fence.group(1))
-        except json.JSONDecodeError:
-            pass
-    block = re.search(r"({.*?})", stripped, re.DOTALL)
-    if block:
-        try:
-            return json.loads(block.group(1))
-        except json.JSONDecodeError:
-            pass
-    raise ValueError("Invalid JSON response.")
 def _build_observation_text(observation: Any) -> str:
@@ -169,7 +156,6 @@ def _get_model_response(
             {"role": "user", "content": user_prompt},
         ],
         "temperature": 0.0,
-        "max_tokens": THINKING_TOKEN_LIMIT,
         "stream": False,
     }
     try:

 import asyncio
 import json
 import os
 import sys
 from pathlib import Path
 from typing import Any, Optional
 BENCHMARK = os.getenv("BENCHMARK", "tracefix_rl")
 MAX_STEPS = int(os.getenv("MAX_STEPS", "50"))
 SUCCESS_SCORE_THRESHOLD = float(os.getenv("SUCCESS_SCORE_THRESHOLD", "0.99"))
 MAX_PARSE_RETRIES = 3
 SYSTEM_PROMPT = (
 def _extract_json(text: str) -> dict[str, Any]:
     stripped = text.strip()
+    if stripped.startswith("```") and stripped.endswith("```"):
+        first_newline = stripped.find("\n")
+        if first_newline == -1:
+            raise ValueError("Invalid JSON response.")
+        stripped = stripped[first_newline + 1 : -3].strip()
     try:
         return json.loads(stripped)
+    except json.JSONDecodeError as exc:
+        raise ValueError("Invalid JSON response.") from exc
 def _build_observation_text(observation: Any) -> str:
             {"role": "user", "content": user_prompt},
         ],
         "temperature": 0.0,
         "stream": False,
     }
     try:

models.py CHANGED Viewed

@@ -5,7 +5,7 @@ from __future__ import annotations
 from typing import Any, Dict, List, Literal, Optional
 from openenv.core.env_server.types import Action, Observation
-from pydantic import BaseModel, Field, model_validator
 ActionType = Literal[
@@ -21,67 +21,45 @@ ActionType = Literal[
 class CodeAction(Action):
     """Structured action consumed by the environment."""
     thought: str = Field(
         ...,
-        description="Mandatory reasoning string before selecting an action.",
     )
     action_type: ActionType = Field(
         ...,
-        description="One of VIEW_CODE, RUN_TESTS, REPLACE_LINES, UNDO_EDIT, RESET_TO_ORIGINAL, SUBMIT.",
     )
-    start_line: Optional[int] = Field(default=None)
-    end_line: Optional[int] = Field(default=None)
-    new_code_block: Optional[str] = Field(default=None)
-    @model_validator(mode="before")
-    @classmethod
-    def validate_and_normalize(cls, data: Any) -> Any:
-        if not isinstance(data, dict):
-            return data
-        action_type = data.get("action_type")
-        def _coerce_optional_int(value: Any) -> Optional[int]:
-            if value is None:
-                return None
-            if isinstance(value, int):
-                return value
-            if isinstance(value, str):
-                raw = value.strip()
-                if raw == "":
-                    return None
-                try:
-                    return int(raw)
-                except ValueError:
-                    return None
-            return None
-        data = dict(data)
-        data["start_line"] = _coerce_optional_int(data.get("start_line"))
-        data["end_line"] = _coerce_optional_int(data.get("end_line"))
-        if action_type == "REPLACE_LINES":
-            start_line = data.get("start_line")
-            end_line = data.get("end_line")
-            new_code_block = data.get("new_code_block")
-            if start_line is None:
-                raise ValueError("REPLACE_LINES requires start_line.")
-            if end_line is None:
-                raise ValueError("REPLACE_LINES requires end_line.")
-            if new_code_block is None:
-                raise ValueError("REPLACE_LINES requires new_code_block.")
-            if start_line < 1 or end_line < 1:
-                raise ValueError("REPLACE_LINES requires start_line and end_line >= 1.")
-            if start_line > end_line:
-                raise ValueError("REPLACE_LINES requires start_line <= end_line.")
-        else:
-            # Web UI often sends default line fields for non-edit actions.
-            data["start_line"] = None
-            data["end_line"] = None
-            data["new_code_block"] = None
-        return data
 class TestResult(BaseModel):

 from typing import Any, Dict, List, Literal, Optional
 from openenv.core.env_server.types import Action, Observation
+from pydantic import BaseModel, ConfigDict, Field
 ActionType = Literal[
 class CodeAction(Action):
     """Structured action consumed by the environment."""
+    model_config = ConfigDict(strict=True)
     thought: str = Field(
         ...,
+        description=(
+            "MANDATORY. Analyze the localized_context and last_execution_output. "
+            "If tests failed, identify the error line and root cause. Explicitly plan "
+            "your next action before executing it."
+        ),
     )
     action_type: ActionType = Field(
         ...,
+        description=(
+            "The specific tool to use. VIEW_CODE to read. RUN_TESTS to execute and get "
+            "tracebacks. REPLACE_LINES to apply a fix. UNDO_EDIT to revert your last "
+            "change if it failed. SUBMIT only when all tests pass."
+        ),
+    )
+    start_line: Optional[int] = Field(
+        default=None,
+        description=(
+            "The inclusive start line number for REPLACE_LINES. You MUST use the exact "
+            "integer keys provided in the code_dict observation."
+        ),
+    )
+    end_line: Optional[int] = Field(
+        default=None,
+        description=(
+            "The inclusive end line number for REPLACE_LINES. You MUST use the exact "
+            "integer keys provided in the code_dict observation."
+        ),
+    )
+    new_code_block: Optional[str] = Field(
+        default=None,
+        description=(
+            "The exact replacement Python code. Must be properly indented to match the "
+            "surrounding code. Do not include markdown formatting or backticks."
+        ),
     )
 class TestResult(BaseModel):