""" sandbox/executor.py — Safe code execution via subprocess isolation. Agent code is untrusted. Running it in-process risks: - Infinite loops blocking the server - File system access - Network exfiltration - Process termination Solution: write code to a temp file, run in a child subprocess with a hard timeout. Docker network policy blocks external network. Main process never crashes. """ import subprocess import tempfile import os import json from typing import Any, Dict def safe_exec( code: str, test_input: str, timeout: int = 5, entry_fn: str = None, ) -> Dict[str, Any]: """ Run agent code in an isolated subprocess. Args: code: Python source code (may include harness wrapper) test_input: Input string passed to the code (for logging only) timeout: Hard kill timeout in seconds (default 5) entry_fn: If provided, append a call to this function Returns: {"ok": True, "output": } {"ok": False, "error": } """ with tempfile.NamedTemporaryFile( mode="w", suffix=".py", delete=False, encoding="utf-8" ) as f: f.write(code) if entry_fn: f.write(f"\nimport json, sys\n") f.write(f"result = {entry_fn}({repr(test_input)})\n") f.write(f'print(json.dumps({{"result": result}}))\n') path = f.name try: proc = subprocess.run( ["python3", path], capture_output=True, text=True, timeout=timeout, ) if proc.returncode == 0 and proc.stdout.strip(): try: output = json.loads(proc.stdout.strip()) return {"ok": True, "output": output} except json.JSONDecodeError: return {"ok": True, "output": proc.stdout.strip()} if proc.returncode != 0: return {"ok": False, "error": (proc.stderr or proc.stdout)[:500]} return {"ok": True, "output": {}} except subprocess.TimeoutExpired: return {"ok": False, "error": "TIMEOUT — code took too long to execute"} except Exception as e: return {"ok": False, "error": f"executor_error:{type(e).__name__}:{e}"} finally: try: os.unlink(path) except OSError: pass def safe_run_tests(code: str, test_cases: list, timeout: int = 5) -> Dict[str, Any]: """ Run structured test cases against agent code. Each test case: {"input": ..., "expected": ...} Returns: {"passed": int, "total": int, "details": [...]} """ passed = 0 details = [] for i, tc in enumerate(test_cases): inp = tc.get("input") expected = tc.get("expected") wrapper = code + f""" import json, sys _inp = {repr(inp)} try: _result = run_task(_inp) _ok = _result == {repr(expected)} print(json.dumps({{"result": str(_result)[:200], "ok": _ok, "expected": {repr(expected)}}})) except Exception as e: print(json.dumps({{"result": None, "ok": False, "error": str(e)[:200], "expected": {repr(expected)}}})) """ result = safe_exec(wrapper, str(inp), timeout=timeout) if result["ok"]: out = result["output"] if isinstance(out, dict) and out.get("ok"): passed += 1 details.append({"test": i, "status": "pass", "input": str(inp)[:60]}) else: details.append({ "test": i, "status": "fail", "input": str(inp)[:60], "got": out.get("result", "?")[:60] if isinstance(out, dict) else str(out)[:60], "expected": str(expected)[:60], }) else: details.append({ "test": i, "status": "error", "input": str(inp)[:60], "error": result.get("error", "")[:100], }) return {"passed": passed, "total": len(test_cases), "details": details}