Lars Talian commited on
Commit
016a288
·
2 Parent(s): 8fcf8b76f0f018

Merge remote-tracking branch 'origin/main' into codex/issue-79-81-20260308

Browse files
Dockerfile CHANGED
@@ -91,6 +91,10 @@ ENV OPENRANGE_SNAPSHOT_POOL_SIZE=1
91
  # Enable the OpenEnv Gradio web interface at /web
92
  ENV ENABLE_WEB_INTERFACE=true
93
 
 
 
 
 
94
  HEALTHCHECK --interval=30s --timeout=5s --start-period=60s --retries=3 \
95
  CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')" || exit 1
96
 
 
91
  # Enable the OpenEnv Gradio web interface at /web
92
  ENV ENABLE_WEB_INTERFACE=true
93
 
94
+ # Clear any pre-existing snapshots so runtime always generates fresh ones
95
+ # with current service specs from service_manifest.py
96
+ RUN rm -rf /app/env/snapshots/* 2>/dev/null || true
97
+
98
  HEALTHCHECK --interval=30s --timeout=5s --start-period=60s --retries=3 \
99
  CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')" || exit 1
100
 
src/open_range/builder/service_manifest.py CHANGED
@@ -56,6 +56,8 @@ _IMAGE_SERVICE_HINTS: dict[str, _ImageHint] = {
56
  [
57
  "mkdir -p /var/run/mysqld && chown mysql:mysql /var/run/mysqld 2>/dev/null || true",
58
  "mkdir -p /var/log/mysql && chown mysql:mysql /var/log/mysql 2>/dev/null || true",
 
 
59
  ],
60
  "mysqld --user=mysql --log-error={log_dir}/mysql.log &",
61
  ReadinessCheck(type="command", command="mysqladmin ping --silent 2>/dev/null || mariadb-admin ping --silent 2>/dev/null", timeout_s=30),
@@ -66,6 +68,8 @@ _IMAGE_SERVICE_HINTS: dict[str, _ImageHint] = {
66
  [
67
  "mkdir -p /var/run/mysqld && chown mysql:mysql /var/run/mysqld 2>/dev/null || true",
68
  "mkdir -p /var/log/mysql && chown mysql:mysql /var/log/mysql 2>/dev/null || true",
 
 
69
  ],
70
  "mariadbd --user=mysql --log-error={log_dir}/mysql.log &",
71
  ReadinessCheck(type="command", command="mariadb-admin ping --silent 2>/dev/null || mysqladmin ping --silent 2>/dev/null", timeout_s=30),
@@ -100,7 +104,10 @@ _IMAGE_SERVICE_HINTS: dict[str, _ImageHint] = {
100
  "rsyslog": (
101
  "rsyslogd",
102
  ["rsyslog"],
103
- [],
 
 
 
104
  "rsyslogd -n > {log_dir}/rsyslog.log 2>&1 &",
105
  ReadinessCheck(type="command", command="pgrep -x rsyslogd", timeout_s=5),
106
  ),
@@ -118,7 +125,11 @@ _IMAGE_SERVICE_HINTS: dict[str, _ImageHint] = {
118
  "postfix": (
119
  "master",
120
  ["postfix"],
121
- [],
 
 
 
 
122
  "postfix start > {log_dir}/postfix.log 2>&1 || true",
123
  ReadinessCheck(type="tcp", port=25, timeout_s=10),
124
  ),
 
56
  [
57
  "mkdir -p /var/run/mysqld && chown mysql:mysql /var/run/mysqld 2>/dev/null || true",
58
  "mkdir -p /var/log/mysql && chown mysql:mysql /var/log/mysql 2>/dev/null || true",
59
+ # Ensure data directory is initialized (idempotent)
60
+ "test -d /var/lib/mysql/mysql || mysql_install_db --user=mysql --datadir=/var/lib/mysql 2>/dev/null || true",
61
  ],
62
  "mysqld --user=mysql --log-error={log_dir}/mysql.log &",
63
  ReadinessCheck(type="command", command="mysqladmin ping --silent 2>/dev/null || mariadb-admin ping --silent 2>/dev/null", timeout_s=30),
 
68
  [
69
  "mkdir -p /var/run/mysqld && chown mysql:mysql /var/run/mysqld 2>/dev/null || true",
70
  "mkdir -p /var/log/mysql && chown mysql:mysql /var/log/mysql 2>/dev/null || true",
71
+ # Ensure data directory is initialized (idempotent)
72
+ "test -d /var/lib/mysql/mysql || mariadb-install-db --user=mysql --datadir=/var/lib/mysql 2>/dev/null || mysql_install_db --user=mysql --datadir=/var/lib/mysql 2>/dev/null || true",
73
  ],
74
  "mariadbd --user=mysql --log-error={log_dir}/mysql.log &",
75
  ReadinessCheck(type="command", command="mariadb-admin ping --silent 2>/dev/null || mysqladmin ping --silent 2>/dev/null", timeout_s=30),
 
104
  "rsyslog": (
105
  "rsyslogd",
106
  ["rsyslog"],
107
+ [
108
+ # Disable imklog (kernel log) — not available in containers
109
+ "sed -i '/imklog/s/^/#/' /etc/rsyslog.conf 2>/dev/null || true",
110
+ ],
111
  "rsyslogd -n > {log_dir}/rsyslog.log 2>&1 &",
112
  ReadinessCheck(type="command", command="pgrep -x rsyslogd", timeout_s=5),
113
  ),
 
125
  "postfix": (
126
  "master",
127
  ["postfix"],
128
+ [
129
+ # Ensure aliases DB exists and fix chroot dirs
130
+ "newaliases 2>/dev/null || true",
131
+ "mkdir -p /var/spool/postfix/pid 2>/dev/null || true",
132
+ ],
133
  "postfix start > {log_dir}/postfix.log 2>&1 || true",
134
  ReadinessCheck(type="tcp", port=25, timeout_s=10),
135
  ),
src/open_range/server/environment.py CHANGED
@@ -15,6 +15,7 @@ from __future__ import annotations
15
  import logging
16
  import os
17
  import re
 
18
  import shlex
19
  import socket
20
  import subprocess as sp
@@ -23,6 +24,30 @@ import urllib.request
23
  from typing import TYPE_CHECKING, Any
24
  from uuid import uuid4
25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  from openenv.core.env_server.interfaces import Environment
27
  from openenv.core.env_server.types import EnvironmentMetadata
28
 
@@ -543,8 +568,8 @@ class RangeEnvironment(Environment[RangeAction, RangeObservation, RangeState]):
543
  def _start_snapshot_services(self, snapshot: SnapshotSpec) -> None:
544
  """Start services based on snapshot spec (subprocess mode only).
545
 
546
- The snapshot's ``services`` list is normally populated by the Renderer.
547
- Older snapshots fall back to topology-derived service specs.
548
  """
549
  if self._execution_mode != "subprocess":
550
  return
@@ -615,7 +640,7 @@ class RangeEnvironment(Environment[RangeAction, RangeObservation, RangeState]):
615
  else svc.start_command
616
  )
617
 
618
- # Run init commands
619
  for cmd in init_commands:
620
  try:
621
  result = sp.run(
@@ -625,6 +650,7 @@ class RangeEnvironment(Environment[RangeAction, RangeObservation, RangeState]):
625
  text=True,
626
  env=env,
627
  check=False,
 
628
  )
629
  if result.returncode != 0 and result.stderr:
630
  logger.debug(
@@ -634,15 +660,20 @@ class RangeEnvironment(Environment[RangeAction, RangeObservation, RangeState]):
634
  except Exception as exc:
635
  logger.warning("Init command failed for %s: %s", svc.daemon, exc)
636
 
637
- # Start the daemon
 
 
 
 
638
  try:
639
  result = sp.run(
640
- ["bash", "-c", start_command],
641
  capture_output=True,
642
  timeout=30,
643
  text=True,
644
  env=env,
645
  check=False,
 
646
  )
647
  if result.returncode != 0 and result.stderr:
648
  logger.debug(
@@ -859,6 +890,15 @@ class RangeEnvironment(Environment[RangeAction, RangeObservation, RangeState]):
859
  except Exception as exc:
860
  logger.debug("NPC traffic log refresh failed: %s", exc)
861
 
 
 
 
 
 
 
 
 
 
862
  # -----------------------------------------------------------------
863
  # Snapshot selection
864
  # -----------------------------------------------------------------
@@ -1286,8 +1326,9 @@ class RangeEnvironment(Environment[RangeAction, RangeObservation, RangeState]):
1286
  self._episode_start = time.time()
1287
  self._episode_recorded = False
1288
  try:
1289
- from open_range.server.console import clear_history
1290
 
 
1291
  clear_history()
1292
  except Exception:
1293
  pass
@@ -1338,6 +1379,7 @@ class RangeEnvironment(Environment[RangeAction, RangeObservation, RangeState]):
1338
  len(self._snapshot.golden_path or []),
1339
  )
1340
 
 
1341
  return RangeObservation(stdout=briefing)
1342
 
1343
  def step(
@@ -1378,11 +1420,13 @@ class RangeEnvironment(Environment[RangeAction, RangeObservation, RangeState]):
1378
 
1379
  cmd_name = _extract_command_name(action.command)
1380
  if not cmd_name:
1381
- return RangeObservation(
1382
  stdout="",
1383
  stderr="Empty command",
1384
  done=self._state.step_count >= self._max_steps,
1385
  )
 
 
1386
 
1387
  # Handle meta-commands (processed by environment, not forwarded to containers)
1388
  meta_handlers = {
@@ -1398,6 +1442,7 @@ class RangeEnvironment(Environment[RangeAction, RangeObservation, RangeState]):
1398
  obs = self._apply_rewards(action, obs)
1399
  self._check_termination(obs)
1400
  self._report_if_done(obs)
 
1401
  return obs
1402
 
1403
  # Route to container
@@ -1453,6 +1498,7 @@ class RangeEnvironment(Environment[RangeAction, RangeObservation, RangeState]):
1453
  self._check_termination(obs)
1454
  self._report_if_done(obs)
1455
 
 
1456
  return obs
1457
 
1458
  @property
 
15
  import logging
16
  import os
17
  import re
18
+ import signal
19
  import shlex
20
  import socket
21
  import subprocess as sp
 
24
  from typing import TYPE_CHECKING, Any
25
  from uuid import uuid4
26
 
27
+
28
+ def _install_zombie_reaper() -> None:
29
+ """Install SIGCHLD handler to reap orphaned child processes.
30
+
31
+ When Python runs as PID 1 (e.g. in Docker containers), it doesn't
32
+ automatically reap zombie children. This handler ensures service
33
+ daemons started via subprocess don't accumulate as zombies.
34
+ """
35
+ def _reap_children(signum: int, frame: Any) -> None:
36
+ while True:
37
+ try:
38
+ pid, _ = os.waitpid(-1, os.WNOHANG)
39
+ if pid == 0:
40
+ break
41
+ except ChildProcessError:
42
+ break
43
+
44
+ signal.signal(signal.SIGCHLD, _reap_children)
45
+
46
+
47
+ # Install at import time so it's active before any service starts
48
+ if os.getpid() == 1:
49
+ _install_zombie_reaper()
50
+
51
  from openenv.core.env_server.interfaces import Environment
52
  from openenv.core.env_server.types import EnvironmentMetadata
53
 
 
568
  def _start_snapshot_services(self, snapshot: SnapshotSpec) -> None:
569
  """Start services based on snapshot spec (subprocess mode only).
570
 
571
+ The snapshot's ``services`` list is normally populated by the renderer.
572
+ Snapshots without explicit service specs skip subprocess provisioning.
573
  """
574
  if self._execution_mode != "subprocess":
575
  return
 
640
  else svc.start_command
641
  )
642
 
643
+ # Run init commands (isolated from PID 1's process group)
644
  for cmd in init_commands:
645
  try:
646
  result = sp.run(
 
650
  text=True,
651
  env=env,
652
  check=False,
653
+ start_new_session=True,
654
  )
655
  if result.returncode != 0 and result.stderr:
656
  logger.debug(
 
660
  except Exception as exc:
661
  logger.warning("Init command failed for %s: %s", svc.daemon, exc)
662
 
663
+ # Start the daemon in a new session so it cannot send signals to
664
+ # PID 1 (uvicorn). Ensure the command is backgrounded.
665
+ effective_cmd = start_command
666
+ if not effective_cmd.rstrip().endswith("&"):
667
+ effective_cmd = f"({effective_cmd}) &"
668
  try:
669
  result = sp.run(
670
+ ["bash", "-c", effective_cmd],
671
  capture_output=True,
672
  timeout=30,
673
  text=True,
674
  env=env,
675
  check=False,
676
+ start_new_session=True,
677
  )
678
  if result.returncode != 0 and result.stderr:
679
  logger.debug(
 
890
  except Exception as exc:
891
  logger.debug("NPC traffic log refresh failed: %s", exc)
892
 
893
+ def _publish_console_state(self) -> None:
894
+ """Publish the latest snapshot/state to the operator console."""
895
+ try:
896
+ from open_range.server.console import publish_episode
897
+
898
+ publish_episode(self._snapshot, self._state)
899
+ except Exception:
900
+ pass
901
+
902
  # -----------------------------------------------------------------
903
  # Snapshot selection
904
  # -----------------------------------------------------------------
 
1326
  self._episode_start = time.time()
1327
  self._episode_recorded = False
1328
  try:
1329
+ from open_range.server.console import clear_episode, clear_history
1330
 
1331
+ clear_episode()
1332
  clear_history()
1333
  except Exception:
1334
  pass
 
1379
  len(self._snapshot.golden_path or []),
1380
  )
1381
 
1382
+ self._publish_console_state()
1383
  return RangeObservation(stdout=briefing)
1384
 
1385
  def step(
 
1420
 
1421
  cmd_name = _extract_command_name(action.command)
1422
  if not cmd_name:
1423
+ obs = RangeObservation(
1424
  stdout="",
1425
  stderr="Empty command",
1426
  done=self._state.step_count >= self._max_steps,
1427
  )
1428
+ self._publish_console_state()
1429
+ return obs
1430
 
1431
  # Handle meta-commands (processed by environment, not forwarded to containers)
1432
  meta_handlers = {
 
1442
  obs = self._apply_rewards(action, obs)
1443
  self._check_termination(obs)
1444
  self._report_if_done(obs)
1445
+ self._publish_console_state()
1446
  return obs
1447
 
1448
  # Route to container
 
1498
  self._check_termination(obs)
1499
  self._report_if_done(obs)
1500
 
1501
+ self._publish_console_state()
1502
  return obs
1503
 
1504
  @property
src/open_range/validator/exploitability.py CHANGED
@@ -15,6 +15,16 @@ _META_COMMANDS = {"submit_flag", "submit_evidence", "submit_finding", "auth", "l
15
  class ExploitabilityCheck:
16
  """Execute every golden-path step and verify ``expect_in_stdout`` appears."""
17
 
 
 
 
 
 
 
 
 
 
 
18
  async def check(self, snapshot: SnapshotSpec, containers: ContainerSet) -> CheckResult:
19
  if not snapshot.golden_path:
20
  return CheckResult(
@@ -43,12 +53,20 @@ class ExploitabilityCheck:
43
 
44
  expected = step.expect_in_stdout
45
  if not expected:
46
- logger.warning(
47
- "exploitability: golden path step %d has no expect_in_stdout"
48
- "output not validated",
49
- step.step,
50
  )
51
- unvalidated_steps.append(step.step)
 
 
 
 
 
 
 
 
 
 
52
  elif expected not in output:
53
  failed_steps.append({
54
  "step": step.step,
@@ -56,7 +74,9 @@ class ExploitabilityCheck:
56
  "got_snippet": output[:300],
57
  })
58
 
59
- passed = len(failed_steps) == 0 and len(unvalidated_steps) == 0
 
 
60
  issues: list[str] = []
61
  if unvalidated_steps:
62
  issues.append(
@@ -65,7 +85,7 @@ class ExploitabilityCheck:
65
  error_parts: list[str] = []
66
  if failed_steps:
67
  error_parts.append(f"{len(failed_steps)} golden-path step(s) failed")
68
- if unvalidated_steps:
69
  error_parts.append(
70
  f"{len(unvalidated_steps)} golden-path step(s) missing expect_in_stdout"
71
  )
@@ -78,6 +98,7 @@ class ExploitabilityCheck:
78
  "unvalidated_steps": unvalidated_steps,
79
  "issues": issues,
80
  "total_steps": len(snapshot.golden_path),
 
81
  },
82
  error="" if passed else "; ".join(error_parts),
83
  )
 
15
  class ExploitabilityCheck:
16
  """Execute every golden-path step and verify ``expect_in_stdout`` appears."""
17
 
18
+ def __init__(self, *, require_expectation: bool = True) -> None:
19
+ """Create an exploitability check.
20
+
21
+ Args:
22
+ require_expectation: When ``True`` (default), every non-meta golden
23
+ path step must define ``expect_in_stdout``. Missing expectations
24
+ are treated as validation failures.
25
+ """
26
+ self.require_expectation = require_expectation
27
+
28
  async def check(self, snapshot: SnapshotSpec, containers: ContainerSet) -> CheckResult:
29
  if not snapshot.golden_path:
30
  return CheckResult(
 
53
 
54
  expected = step.expect_in_stdout
55
  if not expected:
56
+ message = (
57
+ f"golden path step {step.step} has no expect_in_stdout"
 
 
58
  )
59
+ if self.require_expectation:
60
+ failed_steps.append({
61
+ "step": step.step,
62
+ "error": message,
63
+ })
64
+ else:
65
+ logger.warning(
66
+ "exploitability: %s — output not validated",
67
+ message,
68
+ )
69
+ unvalidated_steps.append(step.step)
70
  elif expected not in output:
71
  failed_steps.append({
72
  "step": step.step,
 
74
  "got_snippet": output[:300],
75
  })
76
 
77
+ passed = len(failed_steps) == 0 and (
78
+ not self.require_expectation or len(unvalidated_steps) == 0
79
+ )
80
  issues: list[str] = []
81
  if unvalidated_steps:
82
  issues.append(
 
85
  error_parts: list[str] = []
86
  if failed_steps:
87
  error_parts.append(f"{len(failed_steps)} golden-path step(s) failed")
88
+ if self.require_expectation and unvalidated_steps:
89
  error_parts.append(
90
  f"{len(unvalidated_steps)} golden-path step(s) missing expect_in_stdout"
91
  )
 
98
  "unvalidated_steps": unvalidated_steps,
99
  "issues": issues,
100
  "total_steps": len(snapshot.golden_path),
101
+ "require_expectation": self.require_expectation,
102
  },
103
  error="" if passed else "; ".join(error_parts),
104
  )
tests/test_builder.py CHANGED
@@ -109,22 +109,6 @@ async def test_template_builder_empty_bug_families_uses_default_pool(tier1_manif
109
  assert len(spec.truth_graph.vulns) == 1
110
 
111
 
112
- @pytest.mark.asyncio
113
- async def test_template_builder_handles_non_schema_difficulty_bounds(tier1_manifest):
114
- from open_range.builder.builder import TemplateOnlyBuilder
115
-
116
- builder = TemplateOnlyBuilder()
117
- manifest = {
118
- **tier1_manifest,
119
- "bug_families": ["sqli"],
120
- "difficulty": {**tier1_manifest.get("difficulty", {}), "min_vulns": -2, "max_vulns": 0},
121
- }
122
-
123
- spec = await builder.build(manifest, BuildContext(seed=9, tier=1))
124
- assert len(spec.truth_graph.vulns) == 1
125
- assert spec.truth_graph.vulns[0].type == "sqli"
126
-
127
-
128
  @pytest.mark.asyncio
129
  async def test_template_builder_avoids_previous_vulns(tier1_manifest):
130
  from open_range.builder.builder import TemplateOnlyBuilder
 
109
  assert len(spec.truth_graph.vulns) == 1
110
 
111
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
  @pytest.mark.asyncio
113
  async def test_template_builder_avoids_previous_vulns(tier1_manifest):
114
  from open_range.builder.builder import TemplateOnlyBuilder
tests/test_validator.py CHANGED
@@ -336,8 +336,7 @@ async def test_exploitability_skips_meta_commands(mock_containers):
336
  assert result.details["skipped_steps"] == [2]
337
 
338
 
339
- @pytest.mark.asyncio
340
- async def test_exploitability_fails_when_non_meta_step_has_no_expectation(mock_containers):
341
  from open_range.validator.exploitability import ExploitabilityCheck
342
 
343
  spec = SnapshotSpec(
@@ -345,12 +344,31 @@ async def test_exploitability_fails_when_non_meta_step_has_no_expectation(mock_c
345
  GoldenPathStep(step=1, command="curl http://web/", expect_in_stdout=""),
346
  ],
347
  )
348
- mock_containers.exec_results[("attacker", "curl http://web/")] = "ok"
349
 
350
  result = await ExploitabilityCheck().check(spec, mock_containers)
351
  assert result.passed is False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
352
  assert result.details["unvalidated_steps"] == [1]
353
- assert "missing expect_in_stdout" in result.error
354
 
355
 
356
  # ---------------------------------------------------------------------------
 
336
  assert result.details["skipped_steps"] == [2]
337
 
338
 
339
+ async def test_exploitability_fails_when_expectation_missing_in_strict_mode(mock_containers):
 
340
  from open_range.validator.exploitability import ExploitabilityCheck
341
 
342
  spec = SnapshotSpec(
 
344
  GoldenPathStep(step=1, command="curl http://web/", expect_in_stdout=""),
345
  ],
346
  )
347
+ mock_containers.exec_results[("attacker", "curl http://web/")] = "Welcome"
348
 
349
  result = await ExploitabilityCheck().check(spec, mock_containers)
350
  assert result.passed is False
351
+ assert result.details["require_expectation"] is True
352
+ assert result.details["failed_steps"][0]["error"] == (
353
+ "golden path step 1 has no expect_in_stdout"
354
+ )
355
+
356
+
357
+ @pytest.mark.asyncio
358
+ async def test_exploitability_allows_missing_expectation_in_lenient_mode(mock_containers):
359
+ from open_range.validator.exploitability import ExploitabilityCheck
360
+
361
+ spec = SnapshotSpec(
362
+ golden_path=[
363
+ GoldenPathStep(step=1, command="curl http://web/", expect_in_stdout=""),
364
+ ],
365
+ )
366
+ mock_containers.exec_results[("attacker", "curl http://web/")] = "Welcome"
367
+
368
+ result = await ExploitabilityCheck(require_expectation=False).check(spec, mock_containers)
369
+ assert result.passed is True
370
+ assert result.details["require_expectation"] is False
371
  assert result.details["unvalidated_steps"] == [1]
 
372
 
373
 
374
  # ---------------------------------------------------------------------------