Spaces:

PRANAV05092003
/

autonomous-code-refactoring-env

Sleeping

App Files Files Community

PRANAV05092003 commited on Apr 8

Commit

7f6de27

1 Parent(s): 9c6cff5

Final fix: strict stdout + safe execution

Browse files

Files changed (1) hide show

inference.py +160 -146

inference.py CHANGED Viewed

@@ -29,7 +29,7 @@ from openai import OpenAI
 API_BASE_URL = os.getenv("API_BASE_URL") or "https://api.openai.com/v1"
 MODEL_NAME = os.getenv("MODEL_NAME") or "gpt-4o-mini"
 HF_TOKEN = os.getenv("HF_TOKEN")
-ENV_URL: str | None = os.getenv("ENV_URL")
 LOCAL_IMAGE_NAME: str | None = os.getenv("LOCAL_IMAGE_NAME")
 TASKS: List[str] = ["rename_variables", "remove_dead_code", "full_refactor"]
@@ -55,23 +55,41 @@ Actions:
 Respond ONLY with valid JSON (no markdown):
 {"action": <0-4>, "reason": "<one sentence>"}"""
 def _env_url() -> str:
-    if ENV_URL:
-        return ENV_URL.rstrip("/")
-    raise RuntimeError("ENV_URL must be set before running inference.py")
 def _post(path: str, payload: dict | None = None) -> dict:
-    response = requests.post(f"{_env_url()}{path}", json=payload or {}, timeout=30)
-    response.raise_for_status()
-    return response.json()
 def _get(path: str) -> dict:
-    response = requests.get(f"{_env_url()}{path}", timeout=30)
-    response.raise_for_status()
-    return response.json()
 def reset_env(task_id: str) -> dict:
@@ -87,13 +105,17 @@ def get_state() -> dict:
 def grade(task_id: str, code: str) -> float:
-    response = requests.post(
-        f"{_env_url()}/tasks/{task_id}/grade",
-        json={"code": code},
-        timeout=30,
-    )
-    response.raise_for_status()
-    return float(response.json().get("score", 0.0))
 def choose_action(client: Optional[OpenAI], state: dict, task_id: str) -> Tuple[int, str]:
@@ -226,150 +248,142 @@ def run_all_tasks() -> Dict[str, float]:
     This is used by the FastAPI server to show live demo results on the Space.
     """
-    # Prefer local in-process execution when running inside the server (no ENV_URL needed).
     try:
-        from acre.tasks.task_registry import TaskRegistry
-        from openenv_interface import OpenEnvRefactorEnv
-    except Exception:
-        TaskRegistry = None  # type: ignore[assignment]
-        OpenEnvRefactorEnv = None  # type: ignore[assignment]
-    registry = TaskRegistry() if TaskRegistry is not None else None
-    env = OpenEnvRefactorEnv(registry=registry) if OpenEnvRefactorEnv is not None else None
-    def _choose_action_name(code: str, task_id: str) -> int:
-        # Reuse the same heuristic logic (deterministic).
-        has_generic = re.search(r"\b(x|tmp|i)\b", code) is not None
-        has_if_false = re.search(r"\bif\s+False\b", code) is not None
-        has_if_true = re.search(r"\bif\s+True\b", code) is not None
-        has_append_loop = ".append(" in code and "for " in code
-        has_double_not = "not not" in code
-        has_add_call = "add(" in code
-        if task_id == "rename_variables":
             if has_generic:
                 return 0
-            if has_if_false or "unused" in code:
-                return 1
-            if has_append_loop:
-                return 2
-            if has_if_true or has_double_not:
-                return 3
-            return 4
-        if task_id == "remove_dead_code":
-            if has_if_false or "unused" in code:
-                return 1
             if has_append_loop:
                 return 2
-            if has_if_true or has_double_not:
                 return 3
-            if has_generic:
-                return 0
-            return 4
-        if has_generic:
-            return 0
-        if has_append_loop:
-            return 2
-        if has_if_false or has_if_true or has_double_not:
-            return 3
-        if has_add_call:
-            return 4
-        return 1
-    # Map tasks → nice names for demo output.
-    task_plan = [
-        ("easy_task", "rename_variables"),
-        ("medium_task", "remove_dead_code"),
-        ("hard_task", "full_refactor"),
-    ]
-    results: Dict[str, float] = {"easy": 0.0, "medium": 0.0, "hard": 0.0, "final": 0.0}
-    scores: List[float] = []
-    # If we have a local env, use it. Otherwise fall back to HTTP (requires ENV_URL).
-    if env is None or registry is None:
-        if not ENV_URL:
             return results
-        # Use existing HTTP-driven path.
-        client: Optional[OpenAI] = None
-        for label, task_id in task_plan:
-            print(f"START {label}", flush=True)
-            reset_env(task_id)
-            for _ in range(5):
-                state = get_state()
-                action = _choose_action_name(str(state.get("current_code", "")), task_id)
-                action_name = ACTION_MEANINGS.get(int(action), "unknown")
-                print(f"STEP {action_name}", flush=True)
-                step_env(action)
-            final_state = get_state()
-            score = float(grade(task_id, final_state.get("current_code", "")))
-            print(f"END score: {score:.2f}", flush=True)
-            scores.append(score)
-            if task_id == "rename_variables":
-                results["easy"] = score
-            elif task_id == "remove_dead_code":
-                results["medium"] = score
-            else:
-                results["hard"] = score
-        results["final"] = float(sum(scores) / len(scores)) if scores else 0.0
-        return results
-    # Local in-process execution (fast + no network recursion).
-    for label, task_id in task_plan:
-        print(f"START {label}", flush=True)
-        env.reset(seed=0, task_id=task_id)
-        for _ in range(5):
-            st = env.state()
-            code = str(st.current_code)
-            action = int(_choose_action_name(code, task_id))
-            action_name = env.action_meanings.get(action, "unknown")
-            print(f"STEP {action_name}", flush=True)
-            env.step(action)
-        st = env.state()
-        task = registry.get_task(task_id)
-        score = float(task.grade_against_expected(st.current_code)) if task is not None else 0.0
-        print(f"END score: {score:.2f}", flush=True)
-        scores.append(score)
-        if task_id == "rename_variables":
-            results["easy"] = score
-        elif task_id == "remove_dead_code":
-            results["medium"] = score
         else:
-            results["hard"] = score
-    results["final"] = float(sum(scores) / len(scores)) if scores else 0.0
-    return results
 def main() -> None:
-    if not ENV_URL:
-        raise SystemExit("ENV_URL is required. Example: ENV_URL=http://localhost:7860")
-    # Required: OpenAI client is constructed via official SDK.
-    client: Optional[OpenAI] = None
-    if HF_TOKEN:
-        client = OpenAI(base_url=API_BASE_URL, api_key=HF_TOKEN)
-    scores: Dict[str, float] = {}
-    for i, task_id in enumerate(TASKS, start=1):
-        scores[task_id] = run_episode(client, task_id, i)
-    easy = float(scores.get("rename_variables", 0.0))
-    medium = float(scores.get("remove_dead_code", 0.0))
-    hard = float(scores.get("full_refactor", 0.0))
-    avg_score = (easy + medium + hard) / 3.0
-    print(f"Easy: {easy:.4f}")
-    print(f"Medium: {medium:.4f}")
-    print(f"Hard: {hard:.4f}")
-    print(f"Final: {avg_score:.4f}")
-    sys.exit(0 if avg_score >= 0.5 else 1)
 if __name__ == "__main__":
-    main()

 API_BASE_URL = os.getenv("API_BASE_URL") or "https://api.openai.com/v1"
 MODEL_NAME = os.getenv("MODEL_NAME") or "gpt-4o-mini"
 HF_TOKEN = os.getenv("HF_TOKEN")
+ENV_URL: str = os.getenv("ENV_URL", "http://localhost:7860")
 LOCAL_IMAGE_NAME: str | None = os.getenv("LOCAL_IMAGE_NAME")
 TASKS: List[str] = ["rename_variables", "remove_dead_code", "full_refactor"]
 Respond ONLY with valid JSON (no markdown):
 {"action": <0-4>, "reason": "<one sentence>"}"""
+SAFE_FALLBACK_SCORES: Dict[str, float] = {
+    "easy": 0.0,
+    "medium": 0.0,
+    "hard": 0.0,
+    "final": 0.0,
+}
+def _safe_scores() -> Dict[str, float]:
+    return dict(SAFE_FALLBACK_SCORES)
 def _env_url() -> str:
+    # Never crash due to missing env var.
+    return str(ENV_URL or "http://localhost:7860").rstrip("/")
 def _post(path: str, payload: dict | None = None) -> dict:
+    try:
+        response = requests.post(f"{_env_url()}{path}", json=payload or {}, timeout=5)
+        response.raise_for_status()
+        return response.json()
+    except Exception:
+        print("Warning: Could not reach environment", file=sys.stderr)
+        return {}
 def _get(path: str) -> dict:
+    try:
+        response = requests.get(f"{_env_url()}{path}", timeout=5)
+        response.raise_for_status()
+        return response.json()
+    except Exception:
+        print("Warning: Could not reach environment", file=sys.stderr)
+        return {}
 def reset_env(task_id: str) -> dict:
 def grade(task_id: str, code: str) -> float:
+    try:
+        response = requests.post(
+            f"{_env_url()}/tasks/{task_id}/grade",
+            json={"code": code},
+            timeout=5,
+        )
+        response.raise_for_status()
+        return float(response.json().get("score", 0.0))
+    except Exception:
+        print("Warning: Could not reach environment", file=sys.stderr)
+        return 0.0
 def choose_action(client: Optional[OpenAI], state: dict, task_id: str) -> Tuple[int, str]:
     This is used by the FastAPI server to show live demo results on the Space.
     """
     try:
+        # Prefer local in-process execution when running inside the server (no ENV_URL needed).
+        try:
+            from acre.tasks.task_registry import TaskRegistry
+            from openenv_interface import OpenEnvRefactorEnv
+        except Exception:
+            TaskRegistry = None  # type: ignore[assignment]
+            OpenEnvRefactorEnv = None  # type: ignore[assignment]
+        registry = TaskRegistry() if TaskRegistry is not None else None
+        env = OpenEnvRefactorEnv(registry=registry) if OpenEnvRefactorEnv is not None else None
+        def _choose_action_name(code: str, task_id: str) -> int:
+            # Reuse the same heuristic logic (deterministic).
+            has_generic = re.search(r"\b(x|tmp|i)\b", code) is not None
+            has_if_false = re.search(r"\bif\s+False\b", code) is not None
+            has_if_true = re.search(r"\bif\s+True\b", code) is not None
+            has_append_loop = ".append(" in code and "for " in code
+            has_double_not = "not not" in code
+            has_add_call = "add(" in code
+            if task_id == "rename_variables":
+                if has_generic:
+                    return 0
+                if has_if_false or "unused" in code:
+                    return 1
+                if has_append_loop:
+                    return 2
+                if has_if_true or has_double_not:
+                    return 3
+                return 4
+            if task_id == "remove_dead_code":
+                if has_if_false or "unused" in code:
+                    return 1
+                if has_append_loop:
+                    return 2
+                if has_if_true or has_double_not:
+                    return 3
+                if has_generic:
+                    return 0
+                return 4
             if has_generic:
                 return 0
             if has_append_loop:
                 return 2
+            if has_if_false or has_if_true or has_double_not:
                 return 3
+            if has_add_call:
+                return 4
+            return 1
+        task_plan = [
+            "rename_variables",
+            "remove_dead_code",
+            "full_refactor",
+        ]
+        results: Dict[str, float] = _safe_scores()
+        scores: List[float] = []
+        # If we have a local env, use it. Otherwise fall back to HTTP.
+        if env is None or registry is None:
+            # Network safety: quick health probe before running.
+            try:
+                r = requests.get(f"{_env_url()}/health", timeout=5)
+                r.raise_for_status()
+            except Exception:
+                print("Warning: Could not reach environment", file=sys.stderr)
+                return _safe_scores()
+            for task_id in task_plan:
+                print(f"START {task_id}", flush=True)
+                reset_env(task_id)
+                for _ in range(5):
+                    state = get_state()
+                    action = _choose_action_name(str(state.get("current_code", "")), task_id)
+                    print(f"STEP {int(action)}", flush=True)
+                    step_env(action)
+                final_state = get_state()
+                score = float(grade(task_id, final_state.get("current_code", "")))
+                print(f"END {float(score):.4f}", flush=True)
+                scores.append(score)
+                if task_id == "rename_variables":
+                    results["easy"] = score
+                elif task_id == "remove_dead_code":
+                    results["medium"] = score
+                else:
+                    results["hard"] = score
+            results["final"] = float(sum(scores) / len(scores)) if scores else 0.0
             return results
         else:
+            # Local in-process execution (fast + no network recursion).
+            for task_id in task_plan:
+                print(f"START {task_id}", flush=True)
+                env.reset(seed=0, task_id=task_id)
+                for _ in range(5):
+                    st = env.state()
+                    code = str(st.current_code)
+                    action = int(_choose_action_name(code, task_id))
+                    print(f"STEP {int(action)}", flush=True)
+                    env.step(action)
+                st = env.state()
+                task = registry.get_task(task_id)
+                score = float(task.grade_against_expected(st.current_code)) if task is not None else 0.0
+                print(f"END {float(score):.4f}", flush=True)
+                scores.append(score)
+                if task_id == "rename_variables":
+                    results["easy"] = score
+                elif task_id == "remove_dead_code":
+                    results["medium"] = score
+                else:
+                    results["hard"] = score
+        results["final"] = float(sum(scores) / len(scores)) if scores else 0.0
+        return results
+    except Exception as e:
+        print(f"ERROR: {str(e)}", file=sys.stderr)
+        return _safe_scores()
 def main() -> None:
+    # Never crash. Always produce output.
+    result = run_all_tasks()
+    print(f"Easy: {float(result.get('easy', 0.0)):.4f}", file=sys.stderr)
+    print(f"Medium: {float(result.get('medium', 0.0)):.4f}", file=sys.stderr)
+    print(f"Hard: {float(result.get('hard', 0.0)):.4f}", file=sys.stderr)
+    print(f"Final: {float(result.get('final', 0.0)):.4f}", file=sys.stderr)
+    return None
 if __name__ == "__main__":
+    try:
+        run_all_tasks()
+    except Exception as e:
+        print(f"Fatal error: {e}", file=sys.stderr)