Spaces:

ayussssssiiii
/

codecourt

Sleeping

App Files Files Community

codecourt / oracle /executor.py

ayussssssiiii

Initial HF Space snapshot

fcb838d about 1 month ago

raw

history blame contribute delete

15.9 kB

	"""
	Oracle Executor — Docker-backed sandbox for untrusted code execution.

	Supports Python and C++ submissions, enforces time and memory limits, and
	returns both CodeCourt-native status fields and hackathon-facing outcome data.
	"""

	from __future__ import annotations

	import io
	import os
	import platform
	import resource
	import tarfile
	import tempfile
	import time
	import subprocess
	from dataclasses import dataclass
	from typing import Optional

	try:
	import docker
	from docker.errors import DockerException
	except ImportError: # pragma: no cover - handled at runtime when dependency is missing
	docker = None
	DockerException = Exception


	LANGUAGE_CONFIG = {
	"python": {
	"image": "python:3.11-alpine",
	"source_name": "main.py",
	"run_cmd": ["sh", "-lc", "python3 /workspace/main.py < /workspace/stdin.txt"],
	"compile_cmd": None,
	},
	"cpp": {
	"image": "gcc:13",
	"source_name": "main.cpp",
	"compile_cmd": ["sh", "-lc", "g++ -O2 -std=c++17 /workspace/main.cpp -o /workspace/main"],
	"run_cmd": ["sh", "-lc", "/workspace/main < /workspace/stdin.txt"],
	},
	}


	@dataclass
	class ExecutionResult:
	status: str # 'pass' \| 'fail' \| 'tle' \| 'mle' \| 'error'
	stdout: str
	stderr: str
	timed_out: bool
	memory_exceeded: bool
	execution_time: float
	expected_output: Optional[str] = None
	memory_used_mb: float = 0.0
	language: str = "python"
	outcome: str = "setter_wins" # 'solver_wins' \| 'setter_wins' \| 'compile_error' \| 'time_limit'
	compile_error: bool = False

	@property
	def passed(self) -> bool:
	return self.status == "pass"


	class OracleExecutor:
	"""
	Secure Docker sandbox for executing untrusted code.

	Each execution happens in an isolated container with:
	- network disabled
	- memory cap
	- CPU quota
	- no-new-privileges
	- read-only root filesystem
	"""

	def __init__(
	self,
	time_limit: float = 2.0,
	memory_limit_mb: int = 256,
	default_language: str = "python",
	):
	self.time_limit = time_limit
	self.memory_limit_mb = memory_limit_mb
	self.default_language = default_language
	self._client = None
	self._docker_checked = False
	self._docker_available = False

	def _get_client(self):
	if docker is None:
	return None
	if self._docker_checked:
	return self._client if self._docker_available else None

	self._docker_checked = True
	if self._client is None:
	client = None
	try:
	client = docker.from_env()
	client.ping()
	self._client = client
	self._docker_available = True
	except DockerException:
	try:
	client.close()
	except Exception:
	pass
	self._client = None
	self._docker_available = False
	return self._client

	def _validate_language(self, language: str) -> dict:
	if language not in LANGUAGE_CONFIG:
	raise ValueError(f"Unsupported language: {language}. Expected one of {sorted(LANGUAGE_CONFIG)}")
	return LANGUAGE_CONFIG[language]

	def _create_workspace_archive(self, files: dict[str, str]) -> bytes:
	buffer = io.BytesIO()
	with tarfile.open(fileobj=buffer, mode="w") as tar:
	for filename, content in files.items():
	encoded = content.encode("utf-8")
	info = tarfile.TarInfo(name=filename)
	info.size = len(encoded)
	info.mode = 0o644
	tar.addfile(info, io.BytesIO(encoded))
	buffer.seek(0)
	return buffer.read()

	def _read_peak_memory_mb(self, container) -> float:
	try:
	stats = container.stats(stream=False)
	except DockerException:
	return 0.0

	memory_usage = 0
	if isinstance(stats, dict):
	memory_usage = (
	stats.get("memory_stats", {}).get("max_usage")
	or stats.get("memory_stats", {}).get("usage")
	or 0
	)
	return round(memory_usage / (1024 * 1024), 3)

	def _status_from_exit(self, exit_code: int, expected_output: Optional[str], stdout: str, stderr: str) -> tuple[str, str]:
	if exit_code == 137:
	return "mle", "setter_wins"
	if exit_code != 0:
	compile_like = "syntaxerror" in stderr.lower() or "traceback" in stderr.lower()
	return ("error", "compile_error" if compile_like else "setter_wins")
	if expected_output is None:
	return "pass", "solver_wins"
	if stdout.strip() == expected_output.strip():
	return "pass", "solver_wins"
	return "fail", "setter_wins"

	def _set_local_limits(self):
	if platform.system() == "Darwin":
	return
	mem_bytes = self.memory_limit_mb * 1024 * 1024
	resource.setrlimit(resource.RLIMIT_AS, (mem_bytes, mem_bytes))
	resource.setrlimit(resource.RLIMIT_CPU, (int(self.time_limit) + 1, int(self.time_limit) + 1))

	def _run_local(self, command: list[str], files: dict[str, str]) -> tuple[int, str, str, float, float, bool]:
	start = time.time()
	with tempfile.TemporaryDirectory() as tmpdir:
	for filename, content in files.items():
	with open(os.path.join(tmpdir, filename), "w", encoding="utf-8") as f:
	f.write(content)

	cmd_str = command[2].replace("/workspace", tmpdir)
	try:
	res = subprocess.run(
	cmd_str,
	shell=True,
	cwd=tmpdir,
	capture_output=True,
	timeout=max(self.time_limit + 0.5, 1.0),
	text=True,
	preexec_fn=None if platform.system() == "Darwin" else self._set_local_limits,
	)
	elapsed = time.time() - start
	stderr = res.stderr.strip()
	if res.returncode != 0 and not stderr:
	stderr = f"Exit code: {res.returncode}"
	return res.returncode, res.stdout.strip(), stderr, elapsed, 10.0, False
	except subprocess.TimeoutExpired as e:
	elapsed = time.time() - start
	stdout = e.stdout if isinstance(e.stdout, str) else (e.stdout.decode() if e.stdout else "")
	stderr = e.stderr if isinstance(e.stderr, str) else (e.stderr.decode() if e.stderr else "")
	return 124, stdout.strip(), stderr.strip(), elapsed, 10.0, True

	def _run_container(self, image: str, command: list[str], files: dict[str, str]) -> tuple[int, str, str, float, float, bool]:
	if "SPACE_ID" in os.environ or self._get_client() is None:
	return self._run_local(command, files)

	client = self._get_client()
	container = None
	start = time.time()

	try:
	container = client.containers.create(
	image=image,
	command=command,
	working_dir="/workspace",
	mem_limit=f"{self.memory_limit_mb}m",
	network_disabled=True,
	read_only=True,
	nano_cpus=1_000_000_000,
	security_opt=["no-new-privileges:true"],
	cap_drop=["ALL"],
	pids_limit=128,
	detach=True,
	stdin_open=False,
	tty=False,
	)
	container.put_archive("/workspace", self._create_workspace_archive(files))
	container.start()

	timed_out = False
	try:
	result = container.wait(timeout=max(self.time_limit + 0.5, 1.0))
	except Exception:
	timed_out = True
	container.kill()
	result = {"StatusCode": 124}

	stdout = container.logs(stdout=True, stderr=False).decode("utf-8", errors="replace").strip()
	stderr = container.logs(stdout=False, stderr=True).decode("utf-8", errors="replace").strip()
	elapsed = time.time() - start
	peak_memory_mb = self._read_peak_memory_mb(container)
	return int(result.get("StatusCode", 1)), stdout, stderr, elapsed, peak_memory_mb, timed_out
	finally:
	if container is not None:
	try:
	container.remove(force=True)
	except DockerException:
	pass

	def run(
	self,
	code: str,
	stdin_input: str,
	expected_output: Optional[str] = None,
	language: Optional[str] = None,
	) -> ExecutionResult:
	"""
	Execute source code in Docker with strict sandboxing.
	"""
	chosen_language = language or self.default_language
	try:
	config = self._validate_language(chosen_language)
	except ValueError as exc:
	return ExecutionResult(
	status="error",
	stdout="",
	stderr=str(exc),
	timed_out=False,
	memory_exceeded=False,
	execution_time=0.0,
	expected_output=expected_output,
	language=chosen_language,
	outcome="compile_error",
	compile_error=True,
	)

	source_name = config["source_name"]
	files = {
	source_name: code,
	"stdin.txt": stdin_input,
	}

	try:
	if config["compile_cmd"] is not None:
	compile_exit, _, compile_stderr, compile_time, compile_memory_mb, compile_timed_out = self._run_container(
	config["image"],
	config["compile_cmd"],
	files,
	)
	if compile_timed_out:
	return ExecutionResult(
	status="tle",
	stdout="",
	stderr="Compilation timed out",
	timed_out=True,
	memory_exceeded=False,
	execution_time=compile_time,
	expected_output=expected_output,
	memory_used_mb=compile_memory_mb,
	language=chosen_language,
	outcome="time_limit",
	)
	if compile_exit != 0:
	return ExecutionResult(
	status="error",
	stdout="",
	stderr=compile_stderr or f"Compilation failed with exit code {compile_exit}",
	timed_out=False,
	memory_exceeded=compile_exit == 137,
	execution_time=compile_time,
	expected_output=expected_output,
	memory_used_mb=compile_memory_mb,
	language=chosen_language,
	outcome="compile_error",
	compile_error=True,
	)

	exit_code, stdout, stderr, elapsed, peak_memory_mb, timed_out = self._run_container(
	config["image"],
	config["run_cmd"],
	files,
	)

	if timed_out or elapsed > self.time_limit:
	return ExecutionResult(
	status="tle",
	stdout=stdout,
	stderr=stderr or "Time Limit Exceeded",
	timed_out=True,
	memory_exceeded=False,
	execution_time=elapsed,
	expected_output=expected_output,
	memory_used_mb=peak_memory_mb,
	language=chosen_language,
	outcome="time_limit",
	)

	status, outcome = self._status_from_exit(exit_code, expected_output, stdout, stderr)
	return ExecutionResult(
	status=status,
	stdout=stdout,
	stderr=stderr,
	timed_out=False,
	memory_exceeded=status == "mle",
	execution_time=elapsed,
	expected_output=expected_output,
	memory_used_mb=peak_memory_mb,
	language=chosen_language,
	outcome=outcome,
	compile_error=(outcome == "compile_error"),
	)

	except RuntimeError as exc:
	return ExecutionResult(
	status="error",
	stdout="",
	stderr=str(exc),
	timed_out=False,
	memory_exceeded=False,
	execution_time=0.0,
	expected_output=expected_output,
	memory_used_mb=0.0,
	language=chosen_language,
	outcome="compile_error",
	compile_error=True,
	)
	except DockerException as exc:
	return ExecutionResult(
	status="error",
	stdout="",
	stderr=f"Docker execution failed: {exc}",
	timed_out=False,
	memory_exceeded=False,
	execution_time=0.0,
	expected_output=expected_output,
	memory_used_mb=0.0,
	language=chosen_language,
	outcome="compile_error",
	compile_error=True,
	)

	def run_against_tests(
	self,
	code: str,
	test_cases: list,
	language: Optional[str] = None,
	) -> dict:
	"""
	Run code against multiple test cases.

	Returns both existing CodeCourt keys and hackathon-facing aggregate fields:
	- overall_status
	- pass_rate
	- avg_time
	- avg_memory_mb
	- outcome
	"""
	results = []
	passed = 0

	for i, tc in enumerate(test_cases):
	result = self.run(
	code=code,
	stdin_input=tc["input"],
	expected_output=tc.get("expected"),
	language=language,
	)
	results.append(
	{
	"test_id": i + 1,
	"status": result.status,
	"passed": result.passed,
	"execution_time": result.execution_time,
	"memory_used_mb": result.memory_used_mb,
	"stdout": result.stdout,
	"stderr": result.stderr,
	"outcome": result.outcome,
	"language": result.language,
	}
	)
	if result.passed:
	passed += 1

	if passed == len(test_cases):
	overall_status = "pass"
	outcome = "solver_wins"
	elif any(item["status"] == "tle" for item in results):
	overall_status = "tle"
	outcome = "time_limit"
	elif any(item["outcome"] == "compile_error" for item in results):
	overall_status = "error"
	outcome = "compile_error"
	elif any(item["status"] == "mle" for item in results):
	overall_status = "mle"
	outcome = "setter_wins"
	else:
	overall_status = "fail"
	outcome = "setter_wins"

	total = len(test_cases)
	return {
	"overall_status": overall_status,
	"outcome": outcome,
	"passed": passed,
	"total": total,
	"pass_rate": passed / max(total, 1),
	"results": results,
	"avg_time": sum(item["execution_time"] for item in results) / max(total, 1),
	"avg_memory_mb": sum(item["memory_used_mb"] for item in results) / max(total, 1),
	}