codesensei-env / env /server /sandbox.py
vineetshukla.work@gmail.com
feat: CodeSensei - GRPO-trained LLM code debugger on OpenEnv
c47c81c
"""
CodeSensei — Restricted Python Sandbox.
Executes untrusted Python code (LLM-generated fixes) in a subprocess
with strict timeout and no network access. Returns stdout, stderr,
and success status.
"""
import subprocess
import sys
import tempfile
import os
from typing import Tuple
# Maximum execution time in seconds
SANDBOX_TIMEOUT = 5
# Python preamble injected before user code to disable dangerous operations
_PREAMBLE = """
import builtins as _b
# Block dangerous builtins
for _name in ['__import__', 'exec', 'eval', 'compile', 'open',
'breakpoint', 'exit', 'quit']:
if hasattr(_b, _name):
delattr(_b, _name) if _name not in ('__import__',) else None
# Allow only safe imports via restricted __import__
_SAFE_MODULES = frozenset([
'math', 'collections', 'itertools', 'functools', 'operator',
'string', 're', 'heapq', 'bisect', 'copy', 'typing',
'dataclasses', 'enum', 'json', 'datetime', 'random',
])
_original_import = _b.__import__
def _restricted_import(name, *args, **kwargs):
if name.split('.')[0] not in _SAFE_MODULES:
raise ImportError(f"Module '{name}' is not allowed in sandbox")
return _original_import(name, *args, **kwargs)
_b.__import__ = _restricted_import
"""
def execute_code(code: str, timeout: int = SANDBOX_TIMEOUT) -> Tuple[str, str, bool]:
"""Execute Python code in an isolated subprocess with timeout.
Args:
code: Python source code to execute.
timeout: Maximum execution time in seconds.
Returns:
Tuple of (stdout, stderr, success).
success is True if return code == 0 and no timeout.
"""
# Write code to a temporary file
with tempfile.NamedTemporaryFile(
mode="w", suffix=".py", delete=False, encoding="utf-8"
) as f:
f.write(code)
temp_path = f.name
try:
result = subprocess.run(
[sys.executable, "-u", temp_path],
capture_output=True,
text=True,
timeout=timeout,
env={
**os.environ,
# Disable network access hints (best effort on Windows/Linux)
"no_proxy": "*",
"HTTP_PROXY": "http://0.0.0.0:0",
"HTTPS_PROXY": "http://0.0.0.0:0",
},
)
return result.stdout, result.stderr, result.returncode == 0
except subprocess.TimeoutExpired:
return "", f"Execution timed out after {timeout} seconds", False
except Exception as e:
return "", f"Sandbox error: {str(e)}", False
finally:
try:
os.unlink(temp_path)
except OSError:
pass
def run_function_with_tests(
function_code: str, test_code: str, timeout: int = SANDBOX_TIMEOUT
) -> Tuple[str, str, bool]:
"""Execute a function definition followed by test code.
Combines the function code and test code into a single script,
injects sandbox restrictions, and runs it.
Args:
function_code: Python function definition(s).
test_code: Python test code that calls the function and uses assert.
timeout: Maximum execution time in seconds.
Returns:
Tuple of (stdout, stderr, success).
"""
full_code = f"""{_PREAMBLE}
# --- Function Under Test ---
{function_code}
# --- Test Cases ---
{test_code}
print("ALL_TESTS_PASSED")
"""
return execute_code(full_code, timeout)
def check_syntax(code: str) -> Tuple[bool, str]:
"""Check if Python code has valid syntax without executing it.
Args:
code: Python source code to check.
Returns:
Tuple of (is_valid, error_message).
"""
try:
compile(code, "<proposed_fix>", "exec")
return True, ""
except SyntaxError as e:
return False, f"SyntaxError at line {e.lineno}: {e.msg}"