Spaces:
Runtime error
Runtime error
Merge remote-tracking branch 'origin/main' into codex/issue-79-81-20260308
Browse files- Dockerfile +4 -0
- src/open_range/builder/service_manifest.py +13 -2
- src/open_range/server/environment.py +53 -7
- src/open_range/validator/exploitability.py +28 -7
- tests/test_builder.py +0 -16
- tests/test_validator.py +22 -4
Dockerfile
CHANGED
|
@@ -91,6 +91,10 @@ ENV OPENRANGE_SNAPSHOT_POOL_SIZE=1
|
|
| 91 |
# Enable the OpenEnv Gradio web interface at /web
|
| 92 |
ENV ENABLE_WEB_INTERFACE=true
|
| 93 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 94 |
HEALTHCHECK --interval=30s --timeout=5s --start-period=60s --retries=3 \
|
| 95 |
CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')" || exit 1
|
| 96 |
|
|
|
|
| 91 |
# Enable the OpenEnv Gradio web interface at /web
|
| 92 |
ENV ENABLE_WEB_INTERFACE=true
|
| 93 |
|
| 94 |
+
# Clear any pre-existing snapshots so runtime always generates fresh ones
|
| 95 |
+
# with current service specs from service_manifest.py
|
| 96 |
+
RUN rm -rf /app/env/snapshots/* 2>/dev/null || true
|
| 97 |
+
|
| 98 |
HEALTHCHECK --interval=30s --timeout=5s --start-period=60s --retries=3 \
|
| 99 |
CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')" || exit 1
|
| 100 |
|
src/open_range/builder/service_manifest.py
CHANGED
|
@@ -56,6 +56,8 @@ _IMAGE_SERVICE_HINTS: dict[str, _ImageHint] = {
|
|
| 56 |
[
|
| 57 |
"mkdir -p /var/run/mysqld && chown mysql:mysql /var/run/mysqld 2>/dev/null || true",
|
| 58 |
"mkdir -p /var/log/mysql && chown mysql:mysql /var/log/mysql 2>/dev/null || true",
|
|
|
|
|
|
|
| 59 |
],
|
| 60 |
"mysqld --user=mysql --log-error={log_dir}/mysql.log &",
|
| 61 |
ReadinessCheck(type="command", command="mysqladmin ping --silent 2>/dev/null || mariadb-admin ping --silent 2>/dev/null", timeout_s=30),
|
|
@@ -66,6 +68,8 @@ _IMAGE_SERVICE_HINTS: dict[str, _ImageHint] = {
|
|
| 66 |
[
|
| 67 |
"mkdir -p /var/run/mysqld && chown mysql:mysql /var/run/mysqld 2>/dev/null || true",
|
| 68 |
"mkdir -p /var/log/mysql && chown mysql:mysql /var/log/mysql 2>/dev/null || true",
|
|
|
|
|
|
|
| 69 |
],
|
| 70 |
"mariadbd --user=mysql --log-error={log_dir}/mysql.log &",
|
| 71 |
ReadinessCheck(type="command", command="mariadb-admin ping --silent 2>/dev/null || mysqladmin ping --silent 2>/dev/null", timeout_s=30),
|
|
@@ -100,7 +104,10 @@ _IMAGE_SERVICE_HINTS: dict[str, _ImageHint] = {
|
|
| 100 |
"rsyslog": (
|
| 101 |
"rsyslogd",
|
| 102 |
["rsyslog"],
|
| 103 |
-
[
|
|
|
|
|
|
|
|
|
|
| 104 |
"rsyslogd -n > {log_dir}/rsyslog.log 2>&1 &",
|
| 105 |
ReadinessCheck(type="command", command="pgrep -x rsyslogd", timeout_s=5),
|
| 106 |
),
|
|
@@ -118,7 +125,11 @@ _IMAGE_SERVICE_HINTS: dict[str, _ImageHint] = {
|
|
| 118 |
"postfix": (
|
| 119 |
"master",
|
| 120 |
["postfix"],
|
| 121 |
-
[
|
|
|
|
|
|
|
|
|
|
|
|
|
| 122 |
"postfix start > {log_dir}/postfix.log 2>&1 || true",
|
| 123 |
ReadinessCheck(type="tcp", port=25, timeout_s=10),
|
| 124 |
),
|
|
|
|
| 56 |
[
|
| 57 |
"mkdir -p /var/run/mysqld && chown mysql:mysql /var/run/mysqld 2>/dev/null || true",
|
| 58 |
"mkdir -p /var/log/mysql && chown mysql:mysql /var/log/mysql 2>/dev/null || true",
|
| 59 |
+
# Ensure data directory is initialized (idempotent)
|
| 60 |
+
"test -d /var/lib/mysql/mysql || mysql_install_db --user=mysql --datadir=/var/lib/mysql 2>/dev/null || true",
|
| 61 |
],
|
| 62 |
"mysqld --user=mysql --log-error={log_dir}/mysql.log &",
|
| 63 |
ReadinessCheck(type="command", command="mysqladmin ping --silent 2>/dev/null || mariadb-admin ping --silent 2>/dev/null", timeout_s=30),
|
|
|
|
| 68 |
[
|
| 69 |
"mkdir -p /var/run/mysqld && chown mysql:mysql /var/run/mysqld 2>/dev/null || true",
|
| 70 |
"mkdir -p /var/log/mysql && chown mysql:mysql /var/log/mysql 2>/dev/null || true",
|
| 71 |
+
# Ensure data directory is initialized (idempotent)
|
| 72 |
+
"test -d /var/lib/mysql/mysql || mariadb-install-db --user=mysql --datadir=/var/lib/mysql 2>/dev/null || mysql_install_db --user=mysql --datadir=/var/lib/mysql 2>/dev/null || true",
|
| 73 |
],
|
| 74 |
"mariadbd --user=mysql --log-error={log_dir}/mysql.log &",
|
| 75 |
ReadinessCheck(type="command", command="mariadb-admin ping --silent 2>/dev/null || mysqladmin ping --silent 2>/dev/null", timeout_s=30),
|
|
|
|
| 104 |
"rsyslog": (
|
| 105 |
"rsyslogd",
|
| 106 |
["rsyslog"],
|
| 107 |
+
[
|
| 108 |
+
# Disable imklog (kernel log) — not available in containers
|
| 109 |
+
"sed -i '/imklog/s/^/#/' /etc/rsyslog.conf 2>/dev/null || true",
|
| 110 |
+
],
|
| 111 |
"rsyslogd -n > {log_dir}/rsyslog.log 2>&1 &",
|
| 112 |
ReadinessCheck(type="command", command="pgrep -x rsyslogd", timeout_s=5),
|
| 113 |
),
|
|
|
|
| 125 |
"postfix": (
|
| 126 |
"master",
|
| 127 |
["postfix"],
|
| 128 |
+
[
|
| 129 |
+
# Ensure aliases DB exists and fix chroot dirs
|
| 130 |
+
"newaliases 2>/dev/null || true",
|
| 131 |
+
"mkdir -p /var/spool/postfix/pid 2>/dev/null || true",
|
| 132 |
+
],
|
| 133 |
"postfix start > {log_dir}/postfix.log 2>&1 || true",
|
| 134 |
ReadinessCheck(type="tcp", port=25, timeout_s=10),
|
| 135 |
),
|
src/open_range/server/environment.py
CHANGED
|
@@ -15,6 +15,7 @@ from __future__ import annotations
|
|
| 15 |
import logging
|
| 16 |
import os
|
| 17 |
import re
|
|
|
|
| 18 |
import shlex
|
| 19 |
import socket
|
| 20 |
import subprocess as sp
|
|
@@ -23,6 +24,30 @@ import urllib.request
|
|
| 23 |
from typing import TYPE_CHECKING, Any
|
| 24 |
from uuid import uuid4
|
| 25 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
from openenv.core.env_server.interfaces import Environment
|
| 27 |
from openenv.core.env_server.types import EnvironmentMetadata
|
| 28 |
|
|
@@ -543,8 +568,8 @@ class RangeEnvironment(Environment[RangeAction, RangeObservation, RangeState]):
|
|
| 543 |
def _start_snapshot_services(self, snapshot: SnapshotSpec) -> None:
|
| 544 |
"""Start services based on snapshot spec (subprocess mode only).
|
| 545 |
|
| 546 |
-
The snapshot's ``services`` list is normally populated by the
|
| 547 |
-
|
| 548 |
"""
|
| 549 |
if self._execution_mode != "subprocess":
|
| 550 |
return
|
|
@@ -615,7 +640,7 @@ class RangeEnvironment(Environment[RangeAction, RangeObservation, RangeState]):
|
|
| 615 |
else svc.start_command
|
| 616 |
)
|
| 617 |
|
| 618 |
-
# Run init commands
|
| 619 |
for cmd in init_commands:
|
| 620 |
try:
|
| 621 |
result = sp.run(
|
|
@@ -625,6 +650,7 @@ class RangeEnvironment(Environment[RangeAction, RangeObservation, RangeState]):
|
|
| 625 |
text=True,
|
| 626 |
env=env,
|
| 627 |
check=False,
|
|
|
|
| 628 |
)
|
| 629 |
if result.returncode != 0 and result.stderr:
|
| 630 |
logger.debug(
|
|
@@ -634,15 +660,20 @@ class RangeEnvironment(Environment[RangeAction, RangeObservation, RangeState]):
|
|
| 634 |
except Exception as exc:
|
| 635 |
logger.warning("Init command failed for %s: %s", svc.daemon, exc)
|
| 636 |
|
| 637 |
-
# Start the daemon
|
|
|
|
|
|
|
|
|
|
|
|
|
| 638 |
try:
|
| 639 |
result = sp.run(
|
| 640 |
-
["bash", "-c",
|
| 641 |
capture_output=True,
|
| 642 |
timeout=30,
|
| 643 |
text=True,
|
| 644 |
env=env,
|
| 645 |
check=False,
|
|
|
|
| 646 |
)
|
| 647 |
if result.returncode != 0 and result.stderr:
|
| 648 |
logger.debug(
|
|
@@ -859,6 +890,15 @@ class RangeEnvironment(Environment[RangeAction, RangeObservation, RangeState]):
|
|
| 859 |
except Exception as exc:
|
| 860 |
logger.debug("NPC traffic log refresh failed: %s", exc)
|
| 861 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 862 |
# -----------------------------------------------------------------
|
| 863 |
# Snapshot selection
|
| 864 |
# -----------------------------------------------------------------
|
|
@@ -1286,8 +1326,9 @@ class RangeEnvironment(Environment[RangeAction, RangeObservation, RangeState]):
|
|
| 1286 |
self._episode_start = time.time()
|
| 1287 |
self._episode_recorded = False
|
| 1288 |
try:
|
| 1289 |
-
from open_range.server.console import clear_history
|
| 1290 |
|
|
|
|
| 1291 |
clear_history()
|
| 1292 |
except Exception:
|
| 1293 |
pass
|
|
@@ -1338,6 +1379,7 @@ class RangeEnvironment(Environment[RangeAction, RangeObservation, RangeState]):
|
|
| 1338 |
len(self._snapshot.golden_path or []),
|
| 1339 |
)
|
| 1340 |
|
|
|
|
| 1341 |
return RangeObservation(stdout=briefing)
|
| 1342 |
|
| 1343 |
def step(
|
|
@@ -1378,11 +1420,13 @@ class RangeEnvironment(Environment[RangeAction, RangeObservation, RangeState]):
|
|
| 1378 |
|
| 1379 |
cmd_name = _extract_command_name(action.command)
|
| 1380 |
if not cmd_name:
|
| 1381 |
-
|
| 1382 |
stdout="",
|
| 1383 |
stderr="Empty command",
|
| 1384 |
done=self._state.step_count >= self._max_steps,
|
| 1385 |
)
|
|
|
|
|
|
|
| 1386 |
|
| 1387 |
# Handle meta-commands (processed by environment, not forwarded to containers)
|
| 1388 |
meta_handlers = {
|
|
@@ -1398,6 +1442,7 @@ class RangeEnvironment(Environment[RangeAction, RangeObservation, RangeState]):
|
|
| 1398 |
obs = self._apply_rewards(action, obs)
|
| 1399 |
self._check_termination(obs)
|
| 1400 |
self._report_if_done(obs)
|
|
|
|
| 1401 |
return obs
|
| 1402 |
|
| 1403 |
# Route to container
|
|
@@ -1453,6 +1498,7 @@ class RangeEnvironment(Environment[RangeAction, RangeObservation, RangeState]):
|
|
| 1453 |
self._check_termination(obs)
|
| 1454 |
self._report_if_done(obs)
|
| 1455 |
|
|
|
|
| 1456 |
return obs
|
| 1457 |
|
| 1458 |
@property
|
|
|
|
| 15 |
import logging
|
| 16 |
import os
|
| 17 |
import re
|
| 18 |
+
import signal
|
| 19 |
import shlex
|
| 20 |
import socket
|
| 21 |
import subprocess as sp
|
|
|
|
| 24 |
from typing import TYPE_CHECKING, Any
|
| 25 |
from uuid import uuid4
|
| 26 |
|
| 27 |
+
|
| 28 |
+
def _install_zombie_reaper() -> None:
|
| 29 |
+
"""Install SIGCHLD handler to reap orphaned child processes.
|
| 30 |
+
|
| 31 |
+
When Python runs as PID 1 (e.g. in Docker containers), it doesn't
|
| 32 |
+
automatically reap zombie children. This handler ensures service
|
| 33 |
+
daemons started via subprocess don't accumulate as zombies.
|
| 34 |
+
"""
|
| 35 |
+
def _reap_children(signum: int, frame: Any) -> None:
|
| 36 |
+
while True:
|
| 37 |
+
try:
|
| 38 |
+
pid, _ = os.waitpid(-1, os.WNOHANG)
|
| 39 |
+
if pid == 0:
|
| 40 |
+
break
|
| 41 |
+
except ChildProcessError:
|
| 42 |
+
break
|
| 43 |
+
|
| 44 |
+
signal.signal(signal.SIGCHLD, _reap_children)
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
# Install at import time so it's active before any service starts
|
| 48 |
+
if os.getpid() == 1:
|
| 49 |
+
_install_zombie_reaper()
|
| 50 |
+
|
| 51 |
from openenv.core.env_server.interfaces import Environment
|
| 52 |
from openenv.core.env_server.types import EnvironmentMetadata
|
| 53 |
|
|
|
|
| 568 |
def _start_snapshot_services(self, snapshot: SnapshotSpec) -> None:
|
| 569 |
"""Start services based on snapshot spec (subprocess mode only).
|
| 570 |
|
| 571 |
+
The snapshot's ``services`` list is normally populated by the renderer.
|
| 572 |
+
Snapshots without explicit service specs skip subprocess provisioning.
|
| 573 |
"""
|
| 574 |
if self._execution_mode != "subprocess":
|
| 575 |
return
|
|
|
|
| 640 |
else svc.start_command
|
| 641 |
)
|
| 642 |
|
| 643 |
+
# Run init commands (isolated from PID 1's process group)
|
| 644 |
for cmd in init_commands:
|
| 645 |
try:
|
| 646 |
result = sp.run(
|
|
|
|
| 650 |
text=True,
|
| 651 |
env=env,
|
| 652 |
check=False,
|
| 653 |
+
start_new_session=True,
|
| 654 |
)
|
| 655 |
if result.returncode != 0 and result.stderr:
|
| 656 |
logger.debug(
|
|
|
|
| 660 |
except Exception as exc:
|
| 661 |
logger.warning("Init command failed for %s: %s", svc.daemon, exc)
|
| 662 |
|
| 663 |
+
# Start the daemon in a new session so it cannot send signals to
|
| 664 |
+
# PID 1 (uvicorn). Ensure the command is backgrounded.
|
| 665 |
+
effective_cmd = start_command
|
| 666 |
+
if not effective_cmd.rstrip().endswith("&"):
|
| 667 |
+
effective_cmd = f"({effective_cmd}) &"
|
| 668 |
try:
|
| 669 |
result = sp.run(
|
| 670 |
+
["bash", "-c", effective_cmd],
|
| 671 |
capture_output=True,
|
| 672 |
timeout=30,
|
| 673 |
text=True,
|
| 674 |
env=env,
|
| 675 |
check=False,
|
| 676 |
+
start_new_session=True,
|
| 677 |
)
|
| 678 |
if result.returncode != 0 and result.stderr:
|
| 679 |
logger.debug(
|
|
|
|
| 890 |
except Exception as exc:
|
| 891 |
logger.debug("NPC traffic log refresh failed: %s", exc)
|
| 892 |
|
| 893 |
+
def _publish_console_state(self) -> None:
|
| 894 |
+
"""Publish the latest snapshot/state to the operator console."""
|
| 895 |
+
try:
|
| 896 |
+
from open_range.server.console import publish_episode
|
| 897 |
+
|
| 898 |
+
publish_episode(self._snapshot, self._state)
|
| 899 |
+
except Exception:
|
| 900 |
+
pass
|
| 901 |
+
|
| 902 |
# -----------------------------------------------------------------
|
| 903 |
# Snapshot selection
|
| 904 |
# -----------------------------------------------------------------
|
|
|
|
| 1326 |
self._episode_start = time.time()
|
| 1327 |
self._episode_recorded = False
|
| 1328 |
try:
|
| 1329 |
+
from open_range.server.console import clear_episode, clear_history
|
| 1330 |
|
| 1331 |
+
clear_episode()
|
| 1332 |
clear_history()
|
| 1333 |
except Exception:
|
| 1334 |
pass
|
|
|
|
| 1379 |
len(self._snapshot.golden_path or []),
|
| 1380 |
)
|
| 1381 |
|
| 1382 |
+
self._publish_console_state()
|
| 1383 |
return RangeObservation(stdout=briefing)
|
| 1384 |
|
| 1385 |
def step(
|
|
|
|
| 1420 |
|
| 1421 |
cmd_name = _extract_command_name(action.command)
|
| 1422 |
if not cmd_name:
|
| 1423 |
+
obs = RangeObservation(
|
| 1424 |
stdout="",
|
| 1425 |
stderr="Empty command",
|
| 1426 |
done=self._state.step_count >= self._max_steps,
|
| 1427 |
)
|
| 1428 |
+
self._publish_console_state()
|
| 1429 |
+
return obs
|
| 1430 |
|
| 1431 |
# Handle meta-commands (processed by environment, not forwarded to containers)
|
| 1432 |
meta_handlers = {
|
|
|
|
| 1442 |
obs = self._apply_rewards(action, obs)
|
| 1443 |
self._check_termination(obs)
|
| 1444 |
self._report_if_done(obs)
|
| 1445 |
+
self._publish_console_state()
|
| 1446 |
return obs
|
| 1447 |
|
| 1448 |
# Route to container
|
|
|
|
| 1498 |
self._check_termination(obs)
|
| 1499 |
self._report_if_done(obs)
|
| 1500 |
|
| 1501 |
+
self._publish_console_state()
|
| 1502 |
return obs
|
| 1503 |
|
| 1504 |
@property
|
src/open_range/validator/exploitability.py
CHANGED
|
@@ -15,6 +15,16 @@ _META_COMMANDS = {"submit_flag", "submit_evidence", "submit_finding", "auth", "l
|
|
| 15 |
class ExploitabilityCheck:
|
| 16 |
"""Execute every golden-path step and verify ``expect_in_stdout`` appears."""
|
| 17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
async def check(self, snapshot: SnapshotSpec, containers: ContainerSet) -> CheckResult:
|
| 19 |
if not snapshot.golden_path:
|
| 20 |
return CheckResult(
|
|
@@ -43,12 +53,20 @@ class ExploitabilityCheck:
|
|
| 43 |
|
| 44 |
expected = step.expect_in_stdout
|
| 45 |
if not expected:
|
| 46 |
-
|
| 47 |
-
"
|
| 48 |
-
"output not validated",
|
| 49 |
-
step.step,
|
| 50 |
)
|
| 51 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
elif expected not in output:
|
| 53 |
failed_steps.append({
|
| 54 |
"step": step.step,
|
|
@@ -56,7 +74,9 @@ class ExploitabilityCheck:
|
|
| 56 |
"got_snippet": output[:300],
|
| 57 |
})
|
| 58 |
|
| 59 |
-
passed = len(failed_steps) == 0 and
|
|
|
|
|
|
|
| 60 |
issues: list[str] = []
|
| 61 |
if unvalidated_steps:
|
| 62 |
issues.append(
|
|
@@ -65,7 +85,7 @@ class ExploitabilityCheck:
|
|
| 65 |
error_parts: list[str] = []
|
| 66 |
if failed_steps:
|
| 67 |
error_parts.append(f"{len(failed_steps)} golden-path step(s) failed")
|
| 68 |
-
if unvalidated_steps:
|
| 69 |
error_parts.append(
|
| 70 |
f"{len(unvalidated_steps)} golden-path step(s) missing expect_in_stdout"
|
| 71 |
)
|
|
@@ -78,6 +98,7 @@ class ExploitabilityCheck:
|
|
| 78 |
"unvalidated_steps": unvalidated_steps,
|
| 79 |
"issues": issues,
|
| 80 |
"total_steps": len(snapshot.golden_path),
|
|
|
|
| 81 |
},
|
| 82 |
error="" if passed else "; ".join(error_parts),
|
| 83 |
)
|
|
|
|
| 15 |
class ExploitabilityCheck:
|
| 16 |
"""Execute every golden-path step and verify ``expect_in_stdout`` appears."""
|
| 17 |
|
| 18 |
+
def __init__(self, *, require_expectation: bool = True) -> None:
|
| 19 |
+
"""Create an exploitability check.
|
| 20 |
+
|
| 21 |
+
Args:
|
| 22 |
+
require_expectation: When ``True`` (default), every non-meta golden
|
| 23 |
+
path step must define ``expect_in_stdout``. Missing expectations
|
| 24 |
+
are treated as validation failures.
|
| 25 |
+
"""
|
| 26 |
+
self.require_expectation = require_expectation
|
| 27 |
+
|
| 28 |
async def check(self, snapshot: SnapshotSpec, containers: ContainerSet) -> CheckResult:
|
| 29 |
if not snapshot.golden_path:
|
| 30 |
return CheckResult(
|
|
|
|
| 53 |
|
| 54 |
expected = step.expect_in_stdout
|
| 55 |
if not expected:
|
| 56 |
+
message = (
|
| 57 |
+
f"golden path step {step.step} has no expect_in_stdout"
|
|
|
|
|
|
|
| 58 |
)
|
| 59 |
+
if self.require_expectation:
|
| 60 |
+
failed_steps.append({
|
| 61 |
+
"step": step.step,
|
| 62 |
+
"error": message,
|
| 63 |
+
})
|
| 64 |
+
else:
|
| 65 |
+
logger.warning(
|
| 66 |
+
"exploitability: %s — output not validated",
|
| 67 |
+
message,
|
| 68 |
+
)
|
| 69 |
+
unvalidated_steps.append(step.step)
|
| 70 |
elif expected not in output:
|
| 71 |
failed_steps.append({
|
| 72 |
"step": step.step,
|
|
|
|
| 74 |
"got_snippet": output[:300],
|
| 75 |
})
|
| 76 |
|
| 77 |
+
passed = len(failed_steps) == 0 and (
|
| 78 |
+
not self.require_expectation or len(unvalidated_steps) == 0
|
| 79 |
+
)
|
| 80 |
issues: list[str] = []
|
| 81 |
if unvalidated_steps:
|
| 82 |
issues.append(
|
|
|
|
| 85 |
error_parts: list[str] = []
|
| 86 |
if failed_steps:
|
| 87 |
error_parts.append(f"{len(failed_steps)} golden-path step(s) failed")
|
| 88 |
+
if self.require_expectation and unvalidated_steps:
|
| 89 |
error_parts.append(
|
| 90 |
f"{len(unvalidated_steps)} golden-path step(s) missing expect_in_stdout"
|
| 91 |
)
|
|
|
|
| 98 |
"unvalidated_steps": unvalidated_steps,
|
| 99 |
"issues": issues,
|
| 100 |
"total_steps": len(snapshot.golden_path),
|
| 101 |
+
"require_expectation": self.require_expectation,
|
| 102 |
},
|
| 103 |
error="" if passed else "; ".join(error_parts),
|
| 104 |
)
|
tests/test_builder.py
CHANGED
|
@@ -109,22 +109,6 @@ async def test_template_builder_empty_bug_families_uses_default_pool(tier1_manif
|
|
| 109 |
assert len(spec.truth_graph.vulns) == 1
|
| 110 |
|
| 111 |
|
| 112 |
-
@pytest.mark.asyncio
|
| 113 |
-
async def test_template_builder_handles_non_schema_difficulty_bounds(tier1_manifest):
|
| 114 |
-
from open_range.builder.builder import TemplateOnlyBuilder
|
| 115 |
-
|
| 116 |
-
builder = TemplateOnlyBuilder()
|
| 117 |
-
manifest = {
|
| 118 |
-
**tier1_manifest,
|
| 119 |
-
"bug_families": ["sqli"],
|
| 120 |
-
"difficulty": {**tier1_manifest.get("difficulty", {}), "min_vulns": -2, "max_vulns": 0},
|
| 121 |
-
}
|
| 122 |
-
|
| 123 |
-
spec = await builder.build(manifest, BuildContext(seed=9, tier=1))
|
| 124 |
-
assert len(spec.truth_graph.vulns) == 1
|
| 125 |
-
assert spec.truth_graph.vulns[0].type == "sqli"
|
| 126 |
-
|
| 127 |
-
|
| 128 |
@pytest.mark.asyncio
|
| 129 |
async def test_template_builder_avoids_previous_vulns(tier1_manifest):
|
| 130 |
from open_range.builder.builder import TemplateOnlyBuilder
|
|
|
|
| 109 |
assert len(spec.truth_graph.vulns) == 1
|
| 110 |
|
| 111 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 112 |
@pytest.mark.asyncio
|
| 113 |
async def test_template_builder_avoids_previous_vulns(tier1_manifest):
|
| 114 |
from open_range.builder.builder import TemplateOnlyBuilder
|
tests/test_validator.py
CHANGED
|
@@ -336,8 +336,7 @@ async def test_exploitability_skips_meta_commands(mock_containers):
|
|
| 336 |
assert result.details["skipped_steps"] == [2]
|
| 337 |
|
| 338 |
|
| 339 |
-
|
| 340 |
-
async def test_exploitability_fails_when_non_meta_step_has_no_expectation(mock_containers):
|
| 341 |
from open_range.validator.exploitability import ExploitabilityCheck
|
| 342 |
|
| 343 |
spec = SnapshotSpec(
|
|
@@ -345,12 +344,31 @@ async def test_exploitability_fails_when_non_meta_step_has_no_expectation(mock_c
|
|
| 345 |
GoldenPathStep(step=1, command="curl http://web/", expect_in_stdout=""),
|
| 346 |
],
|
| 347 |
)
|
| 348 |
-
mock_containers.exec_results[("attacker", "curl http://web/")] = "
|
| 349 |
|
| 350 |
result = await ExploitabilityCheck().check(spec, mock_containers)
|
| 351 |
assert result.passed is False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 352 |
assert result.details["unvalidated_steps"] == [1]
|
| 353 |
-
assert "missing expect_in_stdout" in result.error
|
| 354 |
|
| 355 |
|
| 356 |
# ---------------------------------------------------------------------------
|
|
|
|
| 336 |
assert result.details["skipped_steps"] == [2]
|
| 337 |
|
| 338 |
|
| 339 |
+
async def test_exploitability_fails_when_expectation_missing_in_strict_mode(mock_containers):
|
|
|
|
| 340 |
from open_range.validator.exploitability import ExploitabilityCheck
|
| 341 |
|
| 342 |
spec = SnapshotSpec(
|
|
|
|
| 344 |
GoldenPathStep(step=1, command="curl http://web/", expect_in_stdout=""),
|
| 345 |
],
|
| 346 |
)
|
| 347 |
+
mock_containers.exec_results[("attacker", "curl http://web/")] = "Welcome"
|
| 348 |
|
| 349 |
result = await ExploitabilityCheck().check(spec, mock_containers)
|
| 350 |
assert result.passed is False
|
| 351 |
+
assert result.details["require_expectation"] is True
|
| 352 |
+
assert result.details["failed_steps"][0]["error"] == (
|
| 353 |
+
"golden path step 1 has no expect_in_stdout"
|
| 354 |
+
)
|
| 355 |
+
|
| 356 |
+
|
| 357 |
+
@pytest.mark.asyncio
|
| 358 |
+
async def test_exploitability_allows_missing_expectation_in_lenient_mode(mock_containers):
|
| 359 |
+
from open_range.validator.exploitability import ExploitabilityCheck
|
| 360 |
+
|
| 361 |
+
spec = SnapshotSpec(
|
| 362 |
+
golden_path=[
|
| 363 |
+
GoldenPathStep(step=1, command="curl http://web/", expect_in_stdout=""),
|
| 364 |
+
],
|
| 365 |
+
)
|
| 366 |
+
mock_containers.exec_results[("attacker", "curl http://web/")] = "Welcome"
|
| 367 |
+
|
| 368 |
+
result = await ExploitabilityCheck(require_expectation=False).check(spec, mock_containers)
|
| 369 |
+
assert result.passed is True
|
| 370 |
+
assert result.details["require_expectation"] is False
|
| 371 |
assert result.details["unvalidated_steps"] == [1]
|
|
|
|
| 372 |
|
| 373 |
|
| 374 |
# ---------------------------------------------------------------------------
|