Spaces:
Paused
Paused
| """Singularity/Apptainer persistent container environment. | |
| Security-hardened with --containall, --no-home, capability dropping. | |
| Supports configurable resource limits and optional filesystem persistence | |
| via writable overlay directories that survive across sessions. | |
| """ | |
| import logging | |
| import os | |
| import shutil | |
| import subprocess | |
| import threading | |
| import uuid | |
| from pathlib import Path | |
| from typing import Optional | |
| from hermes_constants import get_hermes_home | |
| from tools.environments.base import ( | |
| BaseEnvironment, | |
| _load_json_store, | |
| _popen_bash, | |
| _save_json_store, | |
| ) | |
| logger = logging.getLogger(__name__) | |
| _SNAPSHOT_STORE = get_hermes_home() / "singularity_snapshots.json" | |
| def _find_singularity_executable() -> str: | |
| """Locate the apptainer or singularity CLI binary.""" | |
| if shutil.which("apptainer"): | |
| return "apptainer" | |
| if shutil.which("singularity"): | |
| return "singularity" | |
| raise RuntimeError( | |
| "Neither 'apptainer' nor 'singularity' was found in PATH. " | |
| "Install Apptainer (https://apptainer.org/docs/admin/main/installation.html) " | |
| "or Singularity and ensure the CLI is available." | |
| ) | |
| def _ensure_singularity_available() -> str: | |
| """Preflight check: resolve the executable and verify it responds.""" | |
| exe = _find_singularity_executable() | |
| try: | |
| result = subprocess.run( | |
| [exe, "version"], capture_output=True, text=True, timeout=10, | |
| ) | |
| except FileNotFoundError: | |
| raise RuntimeError( | |
| f"Singularity backend selected but '{exe}' could not be executed." | |
| ) | |
| except subprocess.TimeoutExpired: | |
| raise RuntimeError(f"'{exe} version' timed out.") | |
| if result.returncode != 0: | |
| stderr = result.stderr.strip()[:200] | |
| raise RuntimeError(f"'{exe} version' failed (exit code {result.returncode}): {stderr}") | |
| return exe | |
| def _load_snapshots() -> dict: | |
| return _load_json_store(_SNAPSHOT_STORE) | |
| def _save_snapshots(data: dict) -> None: | |
| _save_json_store(_SNAPSHOT_STORE, data) | |
| def _get_scratch_dir() -> Path: | |
| custom_scratch = os.getenv("TERMINAL_SCRATCH_DIR") | |
| if custom_scratch: | |
| scratch_path = Path(custom_scratch) | |
| scratch_path.mkdir(parents=True, exist_ok=True) | |
| return scratch_path | |
| from tools.environments.base import get_sandbox_dir | |
| sandbox = get_sandbox_dir() / "singularity" | |
| scratch = Path("/scratch") | |
| if scratch.exists() and os.access(scratch, os.W_OK): | |
| user_scratch = scratch / os.getenv("USER", "hermes") / "hermes-agent" | |
| user_scratch.mkdir(parents=True, exist_ok=True) | |
| logger.info("Using /scratch for sandboxes: %s", user_scratch) | |
| return user_scratch | |
| sandbox.mkdir(parents=True, exist_ok=True) | |
| return sandbox | |
| def _get_apptainer_cache_dir() -> Path: | |
| cache_dir = os.getenv("APPTAINER_CACHEDIR") | |
| if cache_dir: | |
| cache_path = Path(cache_dir) | |
| cache_path.mkdir(parents=True, exist_ok=True) | |
| return cache_path | |
| scratch = _get_scratch_dir() | |
| cache_path = scratch / ".apptainer" | |
| cache_path.mkdir(parents=True, exist_ok=True) | |
| return cache_path | |
| _sif_build_lock = threading.Lock() | |
| def _get_or_build_sif(image: str, executable: str = "apptainer") -> str: | |
| if image.endswith('.sif') and Path(image).exists(): | |
| return image | |
| if not image.startswith('docker://'): | |
| return image | |
| image_name = image.replace('docker://', '').replace('/', '-').replace(':', '-') | |
| cache_dir = _get_apptainer_cache_dir() | |
| sif_path = cache_dir / f"{image_name}.sif" | |
| if sif_path.exists(): | |
| return str(sif_path) | |
| with _sif_build_lock: | |
| if sif_path.exists(): | |
| return str(sif_path) | |
| logger.info("Building SIF image (one-time setup)...") | |
| logger.info(" Source: %s", image) | |
| logger.info(" Target: %s", sif_path) | |
| tmp_dir = cache_dir / "tmp" | |
| tmp_dir.mkdir(parents=True, exist_ok=True) | |
| env = os.environ.copy() | |
| env["APPTAINER_TMPDIR"] = str(tmp_dir) | |
| env["APPTAINER_CACHEDIR"] = str(cache_dir) | |
| try: | |
| result = subprocess.run( | |
| [executable, "build", str(sif_path), image], | |
| capture_output=True, text=True, timeout=600, env=env, | |
| ) | |
| if result.returncode != 0: | |
| logger.warning("SIF build failed, falling back to docker:// URL") | |
| logger.warning(" Error: %s", result.stderr[:500]) | |
| return image | |
| logger.info("SIF image built successfully") | |
| return str(sif_path) | |
| except subprocess.TimeoutExpired: | |
| logger.warning("SIF build timed out, falling back to docker:// URL") | |
| if sif_path.exists(): | |
| sif_path.unlink() | |
| return image | |
| except Exception as e: | |
| logger.warning("SIF build error: %s, falling back to docker:// URL", e) | |
| return image | |
| class SingularityEnvironment(BaseEnvironment): | |
| """Hardened Singularity/Apptainer container with resource limits and persistence. | |
| Spawn-per-call: every execute() spawns a fresh ``apptainer exec ... bash -c`` process. | |
| Session snapshot preserves env vars across calls. | |
| CWD persists via in-band stdout markers. | |
| """ | |
| def __init__( | |
| self, | |
| image: str, | |
| cwd: str = "~", | |
| timeout: int = 60, | |
| cpu: float = 0, | |
| memory: int = 0, | |
| disk: int = 0, | |
| persistent_filesystem: bool = False, | |
| task_id: str = "default", | |
| ): | |
| super().__init__(cwd=cwd, timeout=timeout) | |
| self.executable = _ensure_singularity_available() | |
| self.image = _get_or_build_sif(image, self.executable) | |
| self.instance_id = f"hermes_{uuid.uuid4().hex[:12]}" | |
| self._instance_started = False | |
| self._persistent = persistent_filesystem | |
| self._task_id = task_id | |
| self._overlay_dir: Optional[Path] = None | |
| self._cpu = cpu | |
| self._memory = memory | |
| if self._persistent: | |
| overlay_base = _get_scratch_dir() / "hermes-overlays" | |
| overlay_base.mkdir(parents=True, exist_ok=True) | |
| self._overlay_dir = overlay_base / f"overlay-{task_id}" | |
| self._overlay_dir.mkdir(parents=True, exist_ok=True) | |
| self._start_instance() | |
| self.init_session() | |
| def _start_instance(self): | |
| cmd = [self.executable, "instance", "start"] | |
| cmd.extend(["--containall", "--no-home"]) | |
| if self._persistent and self._overlay_dir: | |
| cmd.extend(["--overlay", str(self._overlay_dir)]) | |
| else: | |
| cmd.append("--writable-tmpfs") | |
| try: | |
| from tools.credential_files import get_credential_file_mounts, get_skills_directory_mount | |
| for mount_entry in get_credential_file_mounts(): | |
| cmd.extend(["--bind", f"{mount_entry['host_path']}:{mount_entry['container_path']}:ro"]) | |
| for skills_mount in get_skills_directory_mount(): | |
| cmd.extend(["--bind", f"{skills_mount['host_path']}:{skills_mount['container_path']}:ro"]) | |
| except Exception as e: | |
| logger.debug("Singularity: could not load credential/skills mounts: %s", e) | |
| if self._memory > 0: | |
| cmd.extend(["--memory", f"{self._memory}M"]) | |
| if self._cpu > 0: | |
| cmd.extend(["--cpus", str(self._cpu)]) | |
| cmd.extend([str(self.image), self.instance_id]) | |
| try: | |
| result = subprocess.run(cmd, capture_output=True, text=True, timeout=120) | |
| if result.returncode != 0: | |
| raise RuntimeError(f"Failed to start instance: {result.stderr}") | |
| self._instance_started = True | |
| logger.info("Singularity instance %s started (persistent=%s)", | |
| self.instance_id, self._persistent) | |
| except subprocess.TimeoutExpired: | |
| raise RuntimeError("Instance start timed out") | |
| def _run_bash(self, cmd_string: str, *, login: bool = False, | |
| timeout: int = 120, | |
| stdin_data: str | None = None) -> subprocess.Popen: | |
| """Spawn a bash process inside the Singularity instance.""" | |
| if not self._instance_started: | |
| raise RuntimeError("Singularity instance not started") | |
| cmd = [self.executable, "exec", | |
| f"instance://{self.instance_id}"] | |
| if login: | |
| cmd.extend(["bash", "-l", "-c", cmd_string]) | |
| else: | |
| cmd.extend(["bash", "-c", cmd_string]) | |
| return _popen_bash(cmd, stdin_data) | |
| def cleanup(self): | |
| """Stop the instance. If persistent, the overlay dir survives.""" | |
| if self._instance_started: | |
| try: | |
| subprocess.run( | |
| [self.executable, "instance", "stop", self.instance_id], | |
| capture_output=True, text=True, timeout=30, | |
| ) | |
| logger.info("Singularity instance %s stopped", self.instance_id) | |
| except Exception as e: | |
| logger.warning("Failed to stop Singularity instance %s: %s", self.instance_id, e) | |
| self._instance_started = False | |
| if self._persistent and self._overlay_dir: | |
| snapshots = _load_snapshots() | |
| snapshots[self._task_id] = str(self._overlay_dir) | |
| _save_snapshots(snapshots) | |