Spaces:

abrown31
/

open-range

Runtime error

App Files Files Community

Lars Talian commited on Mar 8

Commit

016a288

2 Parent(s): 8fcf8b7 6f0f018

Merge remote-tracking branch 'origin/main' into codex/issue-79-81-20260308

Browse files

Files changed (6) hide show

Dockerfile +4 -0
src/open_range/builder/service_manifest.py +13 -2
src/open_range/server/environment.py +53 -7
src/open_range/validator/exploitability.py +28 -7
tests/test_builder.py +0 -16
tests/test_validator.py +22 -4

Dockerfile CHANGED Viewed

@@ -91,6 +91,10 @@ ENV OPENRANGE_SNAPSHOT_POOL_SIZE=1
 # Enable the OpenEnv Gradio web interface at /web
 ENV ENABLE_WEB_INTERFACE=true
 HEALTHCHECK --interval=30s --timeout=5s --start-period=60s --retries=3 \
     CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')" || exit 1

 # Enable the OpenEnv Gradio web interface at /web
 ENV ENABLE_WEB_INTERFACE=true
+# Clear any pre-existing snapshots so runtime always generates fresh ones
+# with current service specs from service_manifest.py
+RUN rm -rf /app/env/snapshots/* 2>/dev/null || true
 HEALTHCHECK --interval=30s --timeout=5s --start-period=60s --retries=3 \
     CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')" || exit 1

src/open_range/builder/service_manifest.py CHANGED Viewed

@@ -56,6 +56,8 @@ _IMAGE_SERVICE_HINTS: dict[str, _ImageHint] = {
         [
             "mkdir -p /var/run/mysqld && chown mysql:mysql /var/run/mysqld 2>/dev/null || true",
             "mkdir -p /var/log/mysql && chown mysql:mysql /var/log/mysql 2>/dev/null || true",
         ],
         "mysqld --user=mysql --log-error={log_dir}/mysql.log &",
         ReadinessCheck(type="command", command="mysqladmin ping --silent 2>/dev/null || mariadb-admin ping --silent 2>/dev/null", timeout_s=30),
@@ -66,6 +68,8 @@ _IMAGE_SERVICE_HINTS: dict[str, _ImageHint] = {
         [
             "mkdir -p /var/run/mysqld && chown mysql:mysql /var/run/mysqld 2>/dev/null || true",
             "mkdir -p /var/log/mysql && chown mysql:mysql /var/log/mysql 2>/dev/null || true",
         ],
         "mariadbd --user=mysql --log-error={log_dir}/mysql.log &",
         ReadinessCheck(type="command", command="mariadb-admin ping --silent 2>/dev/null || mysqladmin ping --silent 2>/dev/null", timeout_s=30),
@@ -100,7 +104,10 @@ _IMAGE_SERVICE_HINTS: dict[str, _ImageHint] = {
     "rsyslog": (
         "rsyslogd",
         ["rsyslog"],
-        [],
         "rsyslogd -n > {log_dir}/rsyslog.log 2>&1 &",
         ReadinessCheck(type="command", command="pgrep -x rsyslogd", timeout_s=5),
     ),
@@ -118,7 +125,11 @@ _IMAGE_SERVICE_HINTS: dict[str, _ImageHint] = {
     "postfix": (
         "master",
         ["postfix"],
-        [],
         "postfix start > {log_dir}/postfix.log 2>&1 || true",
         ReadinessCheck(type="tcp", port=25, timeout_s=10),
     ),

         [
             "mkdir -p /var/run/mysqld && chown mysql:mysql /var/run/mysqld 2>/dev/null || true",
             "mkdir -p /var/log/mysql && chown mysql:mysql /var/log/mysql 2>/dev/null || true",
+            # Ensure data directory is initialized (idempotent)
+            "test -d /var/lib/mysql/mysql || mysql_install_db --user=mysql --datadir=/var/lib/mysql 2>/dev/null || true",
         ],
         "mysqld --user=mysql --log-error={log_dir}/mysql.log &",
         ReadinessCheck(type="command", command="mysqladmin ping --silent 2>/dev/null || mariadb-admin ping --silent 2>/dev/null", timeout_s=30),
         [
             "mkdir -p /var/run/mysqld && chown mysql:mysql /var/run/mysqld 2>/dev/null || true",
             "mkdir -p /var/log/mysql && chown mysql:mysql /var/log/mysql 2>/dev/null || true",
+            # Ensure data directory is initialized (idempotent)
+            "test -d /var/lib/mysql/mysql || mariadb-install-db --user=mysql --datadir=/var/lib/mysql 2>/dev/null || mysql_install_db --user=mysql --datadir=/var/lib/mysql 2>/dev/null || true",
         ],
         "mariadbd --user=mysql --log-error={log_dir}/mysql.log &",
         ReadinessCheck(type="command", command="mariadb-admin ping --silent 2>/dev/null || mysqladmin ping --silent 2>/dev/null", timeout_s=30),
     "rsyslog": (
         "rsyslogd",
         ["rsyslog"],
+        [
+            # Disable imklog (kernel log) — not available in containers
+            "sed -i '/imklog/s/^/#/' /etc/rsyslog.conf 2>/dev/null || true",
+        ],
         "rsyslogd -n > {log_dir}/rsyslog.log 2>&1 &",
         ReadinessCheck(type="command", command="pgrep -x rsyslogd", timeout_s=5),
     ),
     "postfix": (
         "master",
         ["postfix"],
+        [
+            # Ensure aliases DB exists and fix chroot dirs
+            "newaliases 2>/dev/null || true",
+            "mkdir -p /var/spool/postfix/pid 2>/dev/null || true",
+        ],
         "postfix start > {log_dir}/postfix.log 2>&1 || true",
         ReadinessCheck(type="tcp", port=25, timeout_s=10),
     ),

src/open_range/server/environment.py CHANGED Viewed

@@ -15,6 +15,7 @@ from __future__ import annotations
 import logging
 import os
 import re
 import shlex
 import socket
 import subprocess as sp
@@ -23,6 +24,30 @@ import urllib.request
 from typing import TYPE_CHECKING, Any
 from uuid import uuid4
 from openenv.core.env_server.interfaces import Environment
 from openenv.core.env_server.types import EnvironmentMetadata
@@ -543,8 +568,8 @@ class RangeEnvironment(Environment[RangeAction, RangeObservation, RangeState]):
     def _start_snapshot_services(self, snapshot: SnapshotSpec) -> None:
         """Start services based on snapshot spec (subprocess mode only).
-        The snapshot's ``services`` list is normally populated by the Renderer.
-        Older snapshots fall back to topology-derived service specs.
         """
         if self._execution_mode != "subprocess":
             return
@@ -615,7 +640,7 @@ class RangeEnvironment(Environment[RangeAction, RangeObservation, RangeState]):
             else svc.start_command
         )
-        # Run init commands
         for cmd in init_commands:
             try:
                 result = sp.run(
@@ -625,6 +650,7 @@ class RangeEnvironment(Environment[RangeAction, RangeObservation, RangeState]):
                     text=True,
                     env=env,
                     check=False,
                 )
                 if result.returncode != 0 and result.stderr:
                     logger.debug(
@@ -634,15 +660,20 @@ class RangeEnvironment(Environment[RangeAction, RangeObservation, RangeState]):
             except Exception as exc:
                 logger.warning("Init command failed for %s: %s", svc.daemon, exc)
-        # Start the daemon
         try:
             result = sp.run(
-                ["bash", "-c", start_command],
                 capture_output=True,
                 timeout=30,
                 text=True,
                 env=env,
                 check=False,
             )
             if result.returncode != 0 and result.stderr:
                 logger.debug(
@@ -859,6 +890,15 @@ class RangeEnvironment(Environment[RangeAction, RangeObservation, RangeState]):
             except Exception as exc:
                 logger.debug("NPC traffic log refresh failed: %s", exc)
     # -----------------------------------------------------------------
     # Snapshot selection
     # -----------------------------------------------------------------
@@ -1286,8 +1326,9 @@ class RangeEnvironment(Environment[RangeAction, RangeObservation, RangeState]):
         self._episode_start = time.time()
         self._episode_recorded = False
         try:
-            from open_range.server.console import clear_history
             clear_history()
         except Exception:
             pass
@@ -1338,6 +1379,7 @@ class RangeEnvironment(Environment[RangeAction, RangeObservation, RangeState]):
             len(self._snapshot.golden_path or []),
         )
         return RangeObservation(stdout=briefing)
     def step(
@@ -1378,11 +1420,13 @@ class RangeEnvironment(Environment[RangeAction, RangeObservation, RangeState]):
         cmd_name = _extract_command_name(action.command)
         if not cmd_name:
-            return RangeObservation(
                 stdout="",
                 stderr="Empty command",
                 done=self._state.step_count >= self._max_steps,
             )
         # Handle meta-commands (processed by environment, not forwarded to containers)
         meta_handlers = {
@@ -1398,6 +1442,7 @@ class RangeEnvironment(Environment[RangeAction, RangeObservation, RangeState]):
             obs = self._apply_rewards(action, obs)
             self._check_termination(obs)
             self._report_if_done(obs)
             return obs
         # Route to container
@@ -1453,6 +1498,7 @@ class RangeEnvironment(Environment[RangeAction, RangeObservation, RangeState]):
         self._check_termination(obs)
         self._report_if_done(obs)
         return obs
     @property

 import logging
 import os
 import re
+import signal
 import shlex
 import socket
 import subprocess as sp
 from typing import TYPE_CHECKING, Any
 from uuid import uuid4
+def _install_zombie_reaper() -> None:
+    """Install SIGCHLD handler to reap orphaned child processes.
+    When Python runs as PID 1 (e.g. in Docker containers), it doesn't
+    automatically reap zombie children.  This handler ensures service
+    daemons started via subprocess don't accumulate as zombies.
+    """
+    def _reap_children(signum: int, frame: Any) -> None:
+        while True:
+            try:
+                pid, _ = os.waitpid(-1, os.WNOHANG)
+                if pid == 0:
+                    break
+            except ChildProcessError:
+                break
+    signal.signal(signal.SIGCHLD, _reap_children)
+# Install at import time so it's active before any service starts
+if os.getpid() == 1:
+    _install_zombie_reaper()
 from openenv.core.env_server.interfaces import Environment
 from openenv.core.env_server.types import EnvironmentMetadata
     def _start_snapshot_services(self, snapshot: SnapshotSpec) -> None:
         """Start services based on snapshot spec (subprocess mode only).
+        The snapshot's ``services`` list is normally populated by the renderer.
+        Snapshots without explicit service specs skip subprocess provisioning.
         """
         if self._execution_mode != "subprocess":
             return
             else svc.start_command
         )
+        # Run init commands (isolated from PID 1's process group)
         for cmd in init_commands:
             try:
                 result = sp.run(
                     text=True,
                     env=env,
                     check=False,
+                    start_new_session=True,
                 )
                 if result.returncode != 0 and result.stderr:
                     logger.debug(
             except Exception as exc:
                 logger.warning("Init command failed for %s: %s", svc.daemon, exc)
+        # Start the daemon in a new session so it cannot send signals to
+        # PID 1 (uvicorn).  Ensure the command is backgrounded.
+        effective_cmd = start_command
+        if not effective_cmd.rstrip().endswith("&"):
+            effective_cmd = f"({effective_cmd}) &"
         try:
             result = sp.run(
+                ["bash", "-c", effective_cmd],
                 capture_output=True,
                 timeout=30,
                 text=True,
                 env=env,
                 check=False,
+                start_new_session=True,
             )
             if result.returncode != 0 and result.stderr:
                 logger.debug(
             except Exception as exc:
                 logger.debug("NPC traffic log refresh failed: %s", exc)
+    def _publish_console_state(self) -> None:
+        """Publish the latest snapshot/state to the operator console."""
+        try:
+            from open_range.server.console import publish_episode
+            publish_episode(self._snapshot, self._state)
+        except Exception:
+            pass
     # -----------------------------------------------------------------
     # Snapshot selection
     # -----------------------------------------------------------------
         self._episode_start = time.time()
         self._episode_recorded = False
         try:
+            from open_range.server.console import clear_episode, clear_history
+            clear_episode()
             clear_history()
         except Exception:
             pass
             len(self._snapshot.golden_path or []),
         )
+        self._publish_console_state()
         return RangeObservation(stdout=briefing)
     def step(
         cmd_name = _extract_command_name(action.command)
         if not cmd_name:
+            obs = RangeObservation(
                 stdout="",
                 stderr="Empty command",
                 done=self._state.step_count >= self._max_steps,
             )
+            self._publish_console_state()
+            return obs
         # Handle meta-commands (processed by environment, not forwarded to containers)
         meta_handlers = {
             obs = self._apply_rewards(action, obs)
             self._check_termination(obs)
             self._report_if_done(obs)
+            self._publish_console_state()
             return obs
         # Route to container
         self._check_termination(obs)
         self._report_if_done(obs)
+        self._publish_console_state()
         return obs
     @property

src/open_range/validator/exploitability.py CHANGED Viewed

@@ -15,6 +15,16 @@ _META_COMMANDS = {"submit_flag", "submit_evidence", "submit_finding", "auth", "l
 class ExploitabilityCheck:
     """Execute every golden-path step and verify ``expect_in_stdout`` appears."""
     async def check(self, snapshot: SnapshotSpec, containers: ContainerSet) -> CheckResult:
         if not snapshot.golden_path:
             return CheckResult(
@@ -43,12 +53,20 @@ class ExploitabilityCheck:
             expected = step.expect_in_stdout
             if not expected:
-                logger.warning(
-                    "exploitability: golden path step %d has no expect_in_stdout — "
-                    "output not validated",
-                    step.step,
                 )
-                unvalidated_steps.append(step.step)
             elif expected not in output:
                 failed_steps.append({
                     "step": step.step,
@@ -56,7 +74,9 @@ class ExploitabilityCheck:
                     "got_snippet": output[:300],
                 })
-        passed = len(failed_steps) == 0 and len(unvalidated_steps) == 0
         issues: list[str] = []
         if unvalidated_steps:
             issues.append(
@@ -65,7 +85,7 @@ class ExploitabilityCheck:
         error_parts: list[str] = []
         if failed_steps:
             error_parts.append(f"{len(failed_steps)} golden-path step(s) failed")
-        if unvalidated_steps:
             error_parts.append(
                 f"{len(unvalidated_steps)} golden-path step(s) missing expect_in_stdout"
             )
@@ -78,6 +98,7 @@ class ExploitabilityCheck:
                 "unvalidated_steps": unvalidated_steps,
                 "issues": issues,
                 "total_steps": len(snapshot.golden_path),
             },
             error="" if passed else "; ".join(error_parts),
         )

 class ExploitabilityCheck:
     """Execute every golden-path step and verify ``expect_in_stdout`` appears."""
+    def __init__(self, *, require_expectation: bool = True) -> None:
+        """Create an exploitability check.
+        Args:
+            require_expectation: When ``True`` (default), every non-meta golden
+                path step must define ``expect_in_stdout``. Missing expectations
+                are treated as validation failures.
+        """
+        self.require_expectation = require_expectation
     async def check(self, snapshot: SnapshotSpec, containers: ContainerSet) -> CheckResult:
         if not snapshot.golden_path:
             return CheckResult(
             expected = step.expect_in_stdout
             if not expected:
+                message = (
+                    f"golden path step {step.step} has no expect_in_stdout"
                 )
+                if self.require_expectation:
+                    failed_steps.append({
+                        "step": step.step,
+                        "error": message,
+                    })
+                else:
+                    logger.warning(
+                        "exploitability: %s — output not validated",
+                        message,
+                    )
+                    unvalidated_steps.append(step.step)
             elif expected not in output:
                 failed_steps.append({
                     "step": step.step,
                     "got_snippet": output[:300],
                 })
+        passed = len(failed_steps) == 0 and (
+            not self.require_expectation or len(unvalidated_steps) == 0
+        )
         issues: list[str] = []
         if unvalidated_steps:
             issues.append(
         error_parts: list[str] = []
         if failed_steps:
             error_parts.append(f"{len(failed_steps)} golden-path step(s) failed")
+        if self.require_expectation and unvalidated_steps:
             error_parts.append(
                 f"{len(unvalidated_steps)} golden-path step(s) missing expect_in_stdout"
             )
                 "unvalidated_steps": unvalidated_steps,
                 "issues": issues,
                 "total_steps": len(snapshot.golden_path),
+                "require_expectation": self.require_expectation,
             },
             error="" if passed else "; ".join(error_parts),
         )

tests/test_builder.py CHANGED Viewed

@@ -109,22 +109,6 @@ async def test_template_builder_empty_bug_families_uses_default_pool(tier1_manif
     assert len(spec.truth_graph.vulns) == 1
-@pytest.mark.asyncio
-async def test_template_builder_handles_non_schema_difficulty_bounds(tier1_manifest):
-    from open_range.builder.builder import TemplateOnlyBuilder
-    builder = TemplateOnlyBuilder()
-    manifest = {
-        **tier1_manifest,
-        "bug_families": ["sqli"],
-        "difficulty": {**tier1_manifest.get("difficulty", {}), "min_vulns": -2, "max_vulns": 0},
-    }
-    spec = await builder.build(manifest, BuildContext(seed=9, tier=1))
-    assert len(spec.truth_graph.vulns) == 1
-    assert spec.truth_graph.vulns[0].type == "sqli"
 @pytest.mark.asyncio
 async def test_template_builder_avoids_previous_vulns(tier1_manifest):
     from open_range.builder.builder import TemplateOnlyBuilder

     assert len(spec.truth_graph.vulns) == 1
 @pytest.mark.asyncio
 async def test_template_builder_avoids_previous_vulns(tier1_manifest):
     from open_range.builder.builder import TemplateOnlyBuilder

tests/test_validator.py CHANGED Viewed

@@ -336,8 +336,7 @@ async def test_exploitability_skips_meta_commands(mock_containers):
     assert result.details["skipped_steps"] == [2]
-@pytest.mark.asyncio
-async def test_exploitability_fails_when_non_meta_step_has_no_expectation(mock_containers):
     from open_range.validator.exploitability import ExploitabilityCheck
     spec = SnapshotSpec(
@@ -345,12 +344,31 @@ async def test_exploitability_fails_when_non_meta_step_has_no_expectation(mock_c
             GoldenPathStep(step=1, command="curl http://web/", expect_in_stdout=""),
         ],
     )
-    mock_containers.exec_results[("attacker", "curl http://web/")] = "ok"
     result = await ExploitabilityCheck().check(spec, mock_containers)
     assert result.passed is False
     assert result.details["unvalidated_steps"] == [1]
-    assert "missing expect_in_stdout" in result.error
 # ---------------------------------------------------------------------------

     assert result.details["skipped_steps"] == [2]
+async def test_exploitability_fails_when_expectation_missing_in_strict_mode(mock_containers):
     from open_range.validator.exploitability import ExploitabilityCheck
     spec = SnapshotSpec(
             GoldenPathStep(step=1, command="curl http://web/", expect_in_stdout=""),
         ],
     )
+    mock_containers.exec_results[("attacker", "curl http://web/")] = "Welcome"
     result = await ExploitabilityCheck().check(spec, mock_containers)
     assert result.passed is False
+    assert result.details["require_expectation"] is True
+    assert result.details["failed_steps"][0]["error"] == (
+        "golden path step 1 has no expect_in_stdout"
+    )
+@pytest.mark.asyncio
+async def test_exploitability_allows_missing_expectation_in_lenient_mode(mock_containers):
+    from open_range.validator.exploitability import ExploitabilityCheck
+    spec = SnapshotSpec(
+        golden_path=[
+            GoldenPathStep(step=1, command="curl http://web/", expect_in_stdout=""),
+        ],
+    )
+    mock_containers.exec_results[("attacker", "curl http://web/")] = "Welcome"
+    result = await ExploitabilityCheck(require_expectation=False).check(spec, mock_containers)
+    assert result.passed is True
+    assert result.details["require_expectation"] is False
     assert result.details["unvalidated_steps"] == [1]
 # ---------------------------------------------------------------------------