Openenv / sandbox /executor.py
vishaldhakad's picture
intial push
eda351c
Raw
History Blame Contribute Delete
4.04 kB
"""
sandbox/executor.py — Safe code execution via subprocess isolation.
Agent code is untrusted. Running it in-process risks:
- Infinite loops blocking the server
- File system access
- Network exfiltration
- Process termination
Solution: write code to a temp file, run in a child subprocess with a hard
timeout. Docker network policy blocks external network. Main process never crashes.
"""
import subprocess
import tempfile
import os
import json
from typing import Any, Dict
def safe_exec(
code: str,
test_input: str,
timeout: int = 5,
entry_fn: str = None,
) -> Dict[str, Any]:
"""
Run agent code in an isolated subprocess.
Args:
code: Python source code (may include harness wrapper)
test_input: Input string passed to the code (for logging only)
timeout: Hard kill timeout in seconds (default 5)
entry_fn: If provided, append a call to this function
Returns:
{"ok": True, "output": <parsed JSON or raw stdout>}
{"ok": False, "error": <stderr or TIMEOUT>}
"""
with tempfile.NamedTemporaryFile(
mode="w", suffix=".py", delete=False, encoding="utf-8"
) as f:
f.write(code)
if entry_fn:
f.write(f"\nimport json, sys\n")
f.write(f"result = {entry_fn}({repr(test_input)})\n")
f.write(f'print(json.dumps({{"result": result}}))\n')
path = f.name
try:
proc = subprocess.run(
["python3", path],
capture_output=True,
text=True,
timeout=timeout,
)
if proc.returncode == 0 and proc.stdout.strip():
try:
output = json.loads(proc.stdout.strip())
return {"ok": True, "output": output}
except json.JSONDecodeError:
return {"ok": True, "output": proc.stdout.strip()}
if proc.returncode != 0:
return {"ok": False, "error": (proc.stderr or proc.stdout)[:500]}
return {"ok": True, "output": {}}
except subprocess.TimeoutExpired:
return {"ok": False, "error": "TIMEOUT — code took too long to execute"}
except Exception as e:
return {"ok": False, "error": f"executor_error:{type(e).__name__}:{e}"}
finally:
try:
os.unlink(path)
except OSError:
pass
def safe_run_tests(code: str, test_cases: list, timeout: int = 5) -> Dict[str, Any]:
"""
Run structured test cases against agent code.
Each test case: {"input": ..., "expected": ...}
Returns:
{"passed": int, "total": int, "details": [...]}
"""
passed = 0
details = []
for i, tc in enumerate(test_cases):
inp = tc.get("input")
expected = tc.get("expected")
wrapper = code + f"""
import json, sys
_inp = {repr(inp)}
try:
_result = run_task(_inp)
_ok = _result == {repr(expected)}
print(json.dumps({{"result": str(_result)[:200], "ok": _ok, "expected": {repr(expected)}}}))
except Exception as e:
print(json.dumps({{"result": None, "ok": False, "error": str(e)[:200], "expected": {repr(expected)}}}))
"""
result = safe_exec(wrapper, str(inp), timeout=timeout)
if result["ok"]:
out = result["output"]
if isinstance(out, dict) and out.get("ok"):
passed += 1
details.append({"test": i, "status": "pass", "input": str(inp)[:60]})
else:
details.append({
"test": i, "status": "fail",
"input": str(inp)[:60],
"got": out.get("result", "?")[:60] if isinstance(out, dict) else str(out)[:60],
"expected": str(expected)[:60],
})
else:
details.append({
"test": i, "status": "error",
"input": str(inp)[:60],
"error": result.get("error", "")[:100],
})
return {"passed": passed, "total": len(test_cases), "details": details}