"""Resource caps and container security options for the sandbox. Centralised here so every reward evaluation uses the same limits and there's one place to audit/change them. """ from __future__ import annotations from typing import Any # Defaults — can be overridden per call at the runner API. DEFAULT_TIMEOUT_S: float = 5.0 DEFAULT_MEM_MB: int = 256 # Grace period beyond timeout_s before forcibly killing the container. WAIT_BUFFER_S: float = 2.0 # Report is written to the bind-mounted /work (not tmpfs /tmp), so it # survives container exit and can be read from the host side directly. REPORT_FILENAME: str = "report.json" REPORT_PATH_IN_CONTAINER: str = f"/work/{REPORT_FILENAME}" # Image tag. Build with `make build-sandbox`. SANDBOX_IMAGE: str = "vrl-sandbox:latest" def container_kwargs(mem_mb: int) -> dict[str, Any]: """Docker SDK kwargs that harden the container against untrusted code. Any change here affects the entire RL reward signal — audit carefully. """ return { # --- isolation --- "network_mode": "none", "read_only": True, "cap_drop": ["ALL"], "security_opt": ["no-new-privileges"], # --- resource caps --- "mem_limit": f"{mem_mb}m", "memswap_limit": f"{mem_mb}m", # no swap escape "nano_cpus": 1_000_000_000, # 1 CPU "pids_limit": 64, # Writable tmpfs for /tmp (scratch only — tmpfs is destroyed on exit). "tmpfs": {"/tmp": "rw,size=64m,mode=1777,nosuid,nodev"}, # Disable .pyc writes. The container runs as UID 1000, but on Linux # CI the host runs as a different UID, so pycache files written to # the bind-mounted /work would be un-deletable by the host cleanup. "environment": {"PYTHONDONTWRITEBYTECODE": "1"}, }