Spaces:

PRANAV05092003
/

autonomous-code-refactoring-env

Sleeping

App Files Files Community

PRANAV05092003 commited on Apr 8

Commit

8c9f7aa

1 Parent(s): 8d66fec

Added missing env module

Browse files

Files changed (4) hide show

acre/env/__init__.py +14 -0
acre/env/__pycache__/__init__.cpython-313.pyc +0 -0
acre/env/__pycache__/refactor_env.cpython-313.pyc +0 -0
acre/env/refactor_env.py +289 -0

acre/env/__init__.py ADDED Viewed

	@@ -0,0 +1,14 @@

+"""
+Environment package for ACRE.
+"""
+from .refactor_env import RefactorEnv
+__all__ = ["RefactorEnv"]
+"""Environment components for ACRE."""
+from .refactor_env import RefactorEnv
+__all__ = ["RefactorEnv"]

acre/env/__pycache__/__init__.cpython-313.pyc ADDED Viewed

Binary file (283 Bytes). View file

acre/env/__pycache__/refactor_env.cpython-313.pyc ADDED Viewed

Binary file (14.9 kB). View file

acre/env/refactor_env.py ADDED Viewed

	@@ -0,0 +1,289 @@

+from __future__ import annotations
+import math
+import re
+import time
+from dataclasses import dataclass
+from typing import Any, Dict, Optional, Tuple
+import multiprocessing as mp
+import gymnasium as gym
+import numpy as np
+from acre.actions import transformations as tx
+from acre.datasets.code_samples import CodeSample, CodeSampleDataset
+try:
+    from radon.complexity import cc_visit
+except Exception:  # pragma: no cover
+    cc_visit = None  # type: ignore[assignment]
+@dataclass(frozen=True)
+class _ExecResult:
+    exit_code: int
+    metrics: Dict[str, Any]
+    error: Optional[str] = None
+_BANNED_PATTERNS: Tuple[str, ...] = (
+    r"\bimport\s+os\b",
+    r"\bimport\s+subprocess\b",
+    r"\bimport\s+pathlib\b",
+    r"\bimport\s+shutil\b",
+    r"\bopen\s*\(",
+    r"\bos\.(remove|unlink|rmdir|removedirs|rename|replace|system|popen)\b",
+    r"\bshutil\.(rmtree|move|copy|copytree)\b",
+    r"\bsubprocess\.(run|Popen|call|check_call|check_output)\b",
+)
+def _exec_worker(src: str, fname: str, out_q: "mp.Queue[dict]") -> None:
+    start = time.perf_counter()
+    try:
+        if any(re.search(p, src) for p in _BANNED_PATTERNS):
+            runtime_s = time.perf_counter() - start
+            out_q.put({"exit_code": 2, "runtime_s": float(runtime_s), "error": "forbidden_operation"})
+            return None
+        compiled = compile(src, fname, "exec")
+        exec_globals: Dict[str, Any] = {"__name__": "__main__"}
+        exec(compiled, exec_globals, None)
+        runtime_s = time.perf_counter() - start
+        out_q.put({"exit_code": 0, "runtime_s": float(runtime_s), "error": None})
+        return None
+    except Exception as exc:
+        runtime_s = time.perf_counter() - start
+        out_q.put({"exit_code": 1, "runtime_s": float(runtime_s), "error": str(exc)})
+        return None
+class _InProcessExecutor:
+    """
+    Execute candidate code with a hard timeout to avoid hanging the server.
+    This is critical for deployment: the agent can easily generate `while True: ...`
+    or other long-running code. We treat timeout as an execution error.
+    """
+    def run(self, code: str, *, filename: str = "<acre>", timeout_s: float = 0.25) -> _ExecResult:
+        q: "mp.Queue[dict]" = mp.Queue(maxsize=1)
+        # NOTE: on Windows, Process target must be picklable (top-level function).
+        proc = mp.Process(target=_exec_worker, args=(code, filename, q), daemon=True)
+        proc.start()
+        proc.join(timeout=max(0.01, float(timeout_s)))
+        if proc.is_alive():
+            proc.terminate()
+            proc.join(timeout=0.1)
+            return _ExecResult(exit_code=124, metrics={"runtime_s": float(timeout_s)}, error="timeout")
+        payload: dict = {}
+        try:
+            payload = q.get_nowait()
+        except Exception:
+            payload = {"exit_code": 1, "runtime_s": 0.0, "error": "no result"}
+        return _ExecResult(
+            exit_code=int(payload.get("exit_code", 1)),
+            metrics={"runtime_s": float(payload.get("runtime_s", 0.0) or 0.0)},
+            error=payload.get("error"),
+        )
+class RefactorEnv(gym.Env):
+    metadata = {"render_modes": []}
+    MAX_STEPS = 5
+    ACTION_MEANINGS: Dict[int, str] = {
+        0: "rename_variable",
+        1: "remove_dead_code",
+        2: "simplify_loop",
+        3: "optimize_condition",
+        4: "inline_function",
+    }
+    def __init__(
+        self,
+        *,
+        dataset: Optional[CodeSampleDataset] = None,
+        seed: Optional[int] = None,
+    ) -> None:
+        super().__init__()
+        self.action_space = gym.spaces.Discrete(5)
+        self.observation_space = gym.spaces.Box(
+            low=np.array([0.0, 0.0, 0.0, 0.0], dtype=np.float32),
+            high=np.array([1e9, 1e9, 1e9, 1.0], dtype=np.float32),
+            dtype=np.float32,
+        )
+        self.dataset: CodeSampleDataset = dataset or CodeSampleDataset(
+            [
+                CodeSample(
+                    id="default",
+                    language="python",
+                    code="def f(x):\n    return x\n",
+                )
+            ]
+        )
+        self._np_random, _ = gym.utils.seeding.np_random(seed)
+        self.executor = _InProcessExecutor()
+        self._episode_steps = 0
+        self._sample: Optional[CodeSample] = None
+        self._code: str = ""
+        self._last_runtime_s: float = 0.0
+        self._last_error: bool = False
+        self._last_complexity: float = 0.0
+    def _compute_complexity(self, code: str) -> float:
+        if cc_visit is None:
+            return float(len(code.splitlines()))
+        try:
+            blocks = cc_visit(code)
+            if not blocks:
+                return 0.0
+            return float(sum(getattr(b, "complexity", 0) for b in blocks))
+        except Exception:
+            return float(len(code.splitlines()))
+    def _compute_runtime(self, code: str) -> Tuple[float, bool, bool]:
+        res = self.executor.run(code, filename="env_exec.py", timeout_s=0.25)
+        runtime_s = float(res.metrics.get("runtime_s", 0.0) or 0.0)
+        is_timeout = bool(res.exit_code == 124)
+        return runtime_s, bool(res.exit_code != 0), is_timeout
+    def _observation(self) -> np.ndarray:
+        return np.asarray(
+            [
+                float(len(self._code)),
+                float(self._last_complexity),
+                float(self._last_runtime_s),
+                float(int(self._last_error)),
+            ],
+            dtype=np.float32,
+        )
+    def reset(self, *, seed: Optional[int] = None, options: Optional[dict] = None):
+        super().reset(seed=seed)
+        if seed is not None:
+            self._np_random, _ = gym.utils.seeding.np_random(seed)
+        samples = list(self.dataset)
+        if not samples:
+            samples = [CodeSample(id="empty", language="python", code="")]
+        idx = int(self._np_random.integers(0, len(samples)))
+        self._sample = samples[idx]
+        self._code = str(self._sample.code)
+        self._episode_steps = 0
+        self._last_complexity = self._compute_complexity(self._code)
+        self._last_runtime_s, self._last_error, _ = self._compute_runtime(self._code)
+        info = {
+            "sample_id": getattr(self._sample, "id", None),
+            "language": getattr(self._sample, "language", None),
+            "episode_steps": self._episode_steps,
+        }
+        return self._observation(), info
+    def step(self, action: int):
+        action_i = int(action)
+        if action_i not in self.ACTION_MEANINGS:
+            raise ValueError(f"Invalid action {action_i}; expected 0..4")
+        prev_complexity = float(self._last_complexity)
+        prev_runtime = float(self._last_runtime_s)
+        prev_error = bool(self._last_error)
+        original = self._code
+        if action_i == 0:
+            transform = tx.rename_variable(original)
+        elif action_i == 1:
+            transform = tx.remove_dead_code(original)
+        elif action_i == 2:
+            transform = tx.simplify_loop(original)
+        elif action_i == 3:
+            transform = tx.optimize_condition(original)
+        else:
+            transform = tx.inline_function(original)
+        self._code = transform.code
+        self._episode_steps += 1
+        self._last_complexity = self._compute_complexity(self._code)
+        self._last_runtime_s, self._last_error, is_timeout = self._compute_runtime(self._code)
+        complexity_gain = (prev_complexity - float(self._last_complexity)) / max(prev_complexity, 1.0)
+        runtime_gain = (prev_runtime - float(self._last_runtime_s)) / max(prev_runtime, 1e-6)
+        # Penalize execution errors strongly; timeouts even more strongly.
+        timeout_penalty = -2.0 if is_timeout else 0.0
+        error_penalty = -1.0 if self._last_error else 0.0
+        change_bonus = 0.05 if transform.changed else 0.0
+        no_change_penalty = -0.02 if not transform.changed else 0.0
+        raw_reward = float(
+            2.0 * complexity_gain
+            + 0.25 * runtime_gain
+            + error_penalty
+            + timeout_penalty
+            + change_bonus
+            + no_change_penalty
+        )
+        if (not prev_error) and self._last_error:
+            raw_reward -= 0.5
+        if prev_error and (not self._last_error):
+            raw_reward += 0.5
+        # Normalize exactly as declared in openenv.yaml (clip to [0,1]).
+        normalized_reward = float((raw_reward + 32.0) / 52.0)
+        if normalized_reward < 0.0:
+            normalized_reward = 0.0
+        elif normalized_reward > 1.0:
+            normalized_reward = 1.0
+        terminated = bool(self._episode_steps >= int(self.MAX_STEPS))
+        truncated = False
+        info: Dict[str, Any] = {
+            "action_name": self.ACTION_MEANINGS[action_i],
+            "changed": bool(transform.changed),
+            "transform": dict(transform.metadata),
+            "reward_components": {
+                "complexity_gain": float(complexity_gain),
+                "runtime_gain": float(runtime_gain),
+                "error_penalty": float(error_penalty),
+                "timeout_penalty": float(timeout_penalty),
+                "change_bonus": float(change_bonus),
+                "no_change_penalty": float(no_change_penalty),
+            },
+            "normalized_reward": normalized_reward,
+            "episode_steps": int(self._episode_steps),
+            "timeout": bool(is_timeout),
+        }
+        return self._observation(), raw_reward, terminated, truncated, info
+    def state(self) -> Dict[str, Any]:
+        return {
+            "current_code": self._code,
+            "episode_steps": int(self._episode_steps),
+            "max_steps": int(self.MAX_STEPS),
+            "complexity": float(self._last_complexity),
+            "last_runtime": float(self._last_runtime_s),
+            "last_error": bool(self._last_error),
+            "sample_id": getattr(self._sample, "id", None) if self._sample is not None else None,
+            "language": getattr(self._sample, "language", None) if self._sample is not None else None,
+            "observation": self._observation().tolist(),
+            "action_meanings": dict(self.ACTION_MEANINGS),
+        }
+    def render(self) -> None:
+        return None
+    def close(self) -> None:
+        return None