Lars Talian commited on
Commit
595e190
·
1 Parent(s): 2f0b84d

fix(validator): add structured exec results and strict patchability outcomes

Browse files
src/open_range/protocols.py CHANGED
@@ -283,6 +283,20 @@ class CheckResult(BaseModel):
283
  advisory: bool = False # if True, failure triggers retry but never blocks
284
 
285
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
286
  class ContainerSet(BaseModel):
287
  """Handle to live Docker containers for a snapshot."""
288
 
@@ -291,22 +305,35 @@ class ContainerSet(BaseModel):
291
  project_name: str = ""
292
  container_ids: dict[str, str] = Field(default_factory=dict) # service -> id
293
 
294
- async def exec(self, container: str, cmd: str, timeout: float = 30.0) -> str:
295
- """Run *cmd* inside *container* and return combined stdout+stderr."""
296
  import asyncio
297
 
298
  cid = self.container_ids.get(container, container)
299
  proc = await asyncio.create_subprocess_exec(
300
  "docker", "exec", cid, "sh", "-c", cmd,
301
  stdout=asyncio.subprocess.PIPE,
302
- stderr=asyncio.subprocess.STDOUT,
303
  )
304
  try:
305
- stdout, _ = await asyncio.wait_for(proc.communicate(), timeout=timeout)
306
  except asyncio.TimeoutError:
307
  proc.kill()
308
- return "<timeout>"
309
- return (stdout or b"").decode(errors="replace")
 
 
 
 
 
 
 
 
 
 
 
 
 
310
 
311
  async def is_healthy(self, container: str) -> bool:
312
  """Return True when *container* is running and its healthcheck passes."""
 
283
  advisory: bool = False # if True, failure triggers retry but never blocks
284
 
285
 
286
+ class ExecResult(BaseModel):
287
+ """Structured command execution result."""
288
+
289
+ stdout: str = ""
290
+ stderr: str = ""
291
+ exit_code: int = 0
292
+ timed_out: bool = False
293
+
294
+ @property
295
+ def combined_output(self) -> str:
296
+ parts = [self.stdout, self.stderr]
297
+ return "\n".join(part for part in parts if part).strip()
298
+
299
+
300
  class ContainerSet(BaseModel):
301
  """Handle to live Docker containers for a snapshot."""
302
 
 
305
  project_name: str = ""
306
  container_ids: dict[str, str] = Field(default_factory=dict) # service -> id
307
 
308
+ async def exec_run(self, container: str, cmd: str, timeout: float = 30.0) -> ExecResult:
309
+ """Run *cmd* inside *container* and return structured output + status."""
310
  import asyncio
311
 
312
  cid = self.container_ids.get(container, container)
313
  proc = await asyncio.create_subprocess_exec(
314
  "docker", "exec", cid, "sh", "-c", cmd,
315
  stdout=asyncio.subprocess.PIPE,
316
+ stderr=asyncio.subprocess.PIPE,
317
  )
318
  try:
319
+ stdout, stderr = await asyncio.wait_for(proc.communicate(), timeout=timeout)
320
  except asyncio.TimeoutError:
321
  proc.kill()
322
+ try:
323
+ await proc.communicate()
324
+ except Exception: # noqa: BLE001
325
+ pass
326
+ return ExecResult(stderr="<timeout>", exit_code=124, timed_out=True)
327
+ return ExecResult(
328
+ stdout=(stdout or b"").decode(errors="replace"),
329
+ stderr=(stderr or b"").decode(errors="replace"),
330
+ exit_code=int(proc.returncode or 0),
331
+ )
332
+
333
+ async def exec(self, container: str, cmd: str, timeout: float = 30.0) -> str:
334
+ """Backward-compatible string output helper around ``exec_run``."""
335
+ result = await self.exec_run(container, cmd, timeout=timeout)
336
+ return result.combined_output
337
 
338
  async def is_healthy(self, container: str) -> bool:
339
  """Return True when *container* is running and its healthcheck passes."""
src/open_range/validator/_golden_path.py CHANGED
@@ -2,7 +2,7 @@
2
 
3
  from __future__ import annotations
4
 
5
- from open_range.protocols import ContainerSet, SnapshotSpec
6
 
7
 
8
  def _command_name(command: str) -> str:
@@ -12,33 +12,37 @@ def _command_name(command: str) -> str:
12
  return stripped.split()[0]
13
 
14
 
15
- async def execute_step(
 
 
 
 
16
  snapshot: SnapshotSpec,
17
  containers: ContainerSet,
18
  command: str,
19
  *,
20
  host: str = "attacker",
21
- ) -> str:
22
- """Execute a golden-path step, including environment meta-commands."""
23
  cmd_name = _command_name(command)
24
  if cmd_name == "submit_flag":
25
  parts = command.strip().split(maxsplit=1)
26
  submitted = parts[1] if len(parts) > 1 else ""
27
  valid_flags = {flag.value for flag in snapshot.flags}
28
  if submitted in valid_flags:
29
- return f"Correct! Flag accepted: {submitted}"
30
- return f"Invalid flag: {submitted}"
31
 
32
  if cmd_name == "submit_evidence":
33
- return "Evidence submitted and recorded."
34
 
35
  if cmd_name == "submit_finding":
36
- return "Finding submitted and recorded."
37
 
38
  if cmd_name == "auth":
39
  parts = command.strip().split(maxsplit=3)
40
  if len(parts) < 4:
41
- return "Usage: auth <host> <username> <password>"
42
  target_host, username, password = parts[1], parts[2], parts[3]
43
  for user in snapshot.topology.get("users", []):
44
  if (
@@ -46,13 +50,30 @@ async def execute_step(
46
  and user.get("password") == password
47
  and target_host in user.get("hosts", [])
48
  ):
49
- return f"Authenticated as {username} on {target_host}."
50
- return f"Authentication failed for {username} on {target_host}."
51
 
52
  if cmd_name == "logout":
53
  parts = command.strip().split(maxsplit=1)
54
  if len(parts) < 2:
55
- return "Usage: logout <host>"
56
- return f"Logged out from {parts[1]}."
 
 
57
 
58
- return await containers.exec(host, command)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
  from __future__ import annotations
4
 
5
+ from open_range.protocols import ContainerSet, ExecResult, SnapshotSpec
6
 
7
 
8
  def _command_name(command: str) -> str:
 
12
  return stripped.split()[0]
13
 
14
 
15
+ def _ok(stdout: str) -> ExecResult:
16
+ return ExecResult(stdout=stdout, exit_code=0)
17
+
18
+
19
+ async def execute_step_result(
20
  snapshot: SnapshotSpec,
21
  containers: ContainerSet,
22
  command: str,
23
  *,
24
  host: str = "attacker",
25
+ ) -> ExecResult:
26
+ """Execute a golden-path step and return structured command result."""
27
  cmd_name = _command_name(command)
28
  if cmd_name == "submit_flag":
29
  parts = command.strip().split(maxsplit=1)
30
  submitted = parts[1] if len(parts) > 1 else ""
31
  valid_flags = {flag.value for flag in snapshot.flags}
32
  if submitted in valid_flags:
33
+ return _ok(f"Correct! Flag accepted: {submitted}")
34
+ return _ok(f"Invalid flag: {submitted}")
35
 
36
  if cmd_name == "submit_evidence":
37
+ return _ok("Evidence submitted and recorded.")
38
 
39
  if cmd_name == "submit_finding":
40
+ return _ok("Finding submitted and recorded.")
41
 
42
  if cmd_name == "auth":
43
  parts = command.strip().split(maxsplit=3)
44
  if len(parts) < 4:
45
+ return _ok("Usage: auth <host> <username> <password>")
46
  target_host, username, password = parts[1], parts[2], parts[3]
47
  for user in snapshot.topology.get("users", []):
48
  if (
 
50
  and user.get("password") == password
51
  and target_host in user.get("hosts", [])
52
  ):
53
+ return _ok(f"Authenticated as {username} on {target_host}.")
54
+ return _ok(f"Authentication failed for {username} on {target_host}.")
55
 
56
  if cmd_name == "logout":
57
  parts = command.strip().split(maxsplit=1)
58
  if len(parts) < 2:
59
+ return _ok("Usage: logout <host>")
60
+ return _ok(f"Logged out from {parts[1]}.")
61
+
62
+ return await containers.exec_run(host, command)
63
 
64
+
65
+ async def execute_step(
66
+ snapshot: SnapshotSpec,
67
+ containers: ContainerSet,
68
+ command: str,
69
+ *,
70
+ host: str = "attacker",
71
+ ) -> str:
72
+ """Execute a golden-path step, including environment meta-commands."""
73
+ result = await execute_step_result(
74
+ snapshot,
75
+ containers,
76
+ command,
77
+ host=host,
78
+ )
79
+ return result.combined_output
src/open_range/validator/evidence.py CHANGED
@@ -6,43 +6,6 @@ import shlex
6
 
7
  from open_range.protocols import CheckResult, ContainerSet, SnapshotSpec
8
 
9
- _RC_SENTINEL = "__OPENRANGE_RC__:"
10
-
11
-
12
- def _with_exit_marker(command: str) -> str:
13
- """Wrap a shell command so output includes a parseable exit-code marker."""
14
- return f"{command}; rc=$?; echo {_RC_SENTINEL}$rc"
15
-
16
-
17
- def _parse_marked_output(raw: str) -> tuple[str, int]:
18
- """Parse command output and recover the embedded exit code.
19
-
20
- Falls back to best-effort inference when the marker is absent (e.g., mocks).
21
- """
22
- lines = raw.splitlines()
23
- marker_idx = -1
24
- marker_rc: int | None = None
25
-
26
- for idx in range(len(lines) - 1, -1, -1):
27
- line = lines[idx].strip()
28
- if not line.startswith(_RC_SENTINEL):
29
- continue
30
- value = line[len(_RC_SENTINEL):].strip()
31
- if value.isdigit():
32
- marker_idx = idx
33
- marker_rc = int(value)
34
- break
35
-
36
- if marker_rc is not None:
37
- payload = "\n".join(lines[:marker_idx] + lines[marker_idx + 1:]).strip()
38
- return payload, marker_rc
39
-
40
- # Fallback for test doubles that return a plain string without marker.
41
- payload = raw.strip()
42
- if payload == "" or payload.isdigit():
43
- return payload, 0
44
- return payload, 1
45
-
46
 
47
  class EvidenceCheck:
48
  """Verify all ``evidence_spec`` items exist in the running containers."""
@@ -71,35 +34,44 @@ class EvidenceCheck:
71
  try:
72
  safe_path = shlex.quote(path)
73
  if item.type in ("log_entry", "alert"):
74
- # grep for pattern in the file
75
- base_cmd = (
76
- f"grep -c {shlex.quote(pattern)} {safe_path}"
77
- if pattern
78
- else f"test -f {safe_path} && echo ok"
79
- )
80
- output, rc = _parse_marked_output(
81
- await containers.exec(host, _with_exit_marker(base_cmd))
82
- )
83
- if pattern and output.strip() in ("0", ""):
84
- missing.append({"item": item.type, "location": loc, "pattern": pattern})
85
- elif rc != 0:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
  missing.append({
87
  "item": item.type,
88
  "location": loc,
89
- "pattern": pattern,
90
- "error": output or f"evidence command failed (exit={rc})",
91
  })
92
- else:
93
- # file existence check
94
- base_cmd = f"test -f {safe_path} && echo exists"
95
- output, rc = _parse_marked_output(
96
- await containers.exec(host, _with_exit_marker(base_cmd))
97
- )
98
- if rc != 0 or "exists" not in output:
99
- detail = {"item": item.type, "location": loc}
100
- if rc != 0 and output:
101
- detail["error"] = output
102
- missing.append(detail)
103
  except Exception as exc: # noqa: BLE001
104
  missing.append({"item": item.type, "location": loc, "error": str(exc)})
105
 
 
6
 
7
  from open_range.protocols import CheckResult, ContainerSet, SnapshotSpec
8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
  class EvidenceCheck:
11
  """Verify all ``evidence_spec`` items exist in the running containers."""
 
34
  try:
35
  safe_path = shlex.quote(path)
36
  if item.type in ("log_entry", "alert"):
37
+ if pattern:
38
+ result = await containers.exec_run(
39
+ host,
40
+ f"grep -c {shlex.quote(pattern)} {safe_path}",
41
+ )
42
+ output = result.stdout.strip()
43
+ if result.exit_code != 0:
44
+ missing.append({
45
+ "item": item.type,
46
+ "location": loc,
47
+ "pattern": pattern,
48
+ "error": result.combined_output
49
+ or f"evidence command failed (exit={result.exit_code})",
50
+ })
51
+ elif output in ("0", ""):
52
+ missing.append({
53
+ "item": item.type,
54
+ "location": loc,
55
+ "pattern": pattern,
56
+ })
57
+ else:
58
+ result = await containers.exec_run(host, f"test -f {safe_path}")
59
+ if result.exit_code != 0:
60
+ missing.append({
61
+ "item": item.type,
62
+ "location": loc,
63
+ "error": result.combined_output
64
+ or f"missing evidence file (exit={result.exit_code})",
65
+ })
66
+ else:
67
+ result = await containers.exec_run(host, f"test -f {safe_path}")
68
+ if result.exit_code != 0:
69
  missing.append({
70
  "item": item.type,
71
  "location": loc,
72
+ "error": result.combined_output
73
+ or f"missing evidence file (exit={result.exit_code})",
74
  })
 
 
 
 
 
 
 
 
 
 
 
75
  except Exception as exc: # noqa: BLE001
76
  missing.append({"item": item.type, "location": loc, "error": str(exc)})
77
 
src/open_range/validator/exploitability.py CHANGED
@@ -5,7 +5,7 @@ from __future__ import annotations
5
  import logging
6
 
7
  from open_range.protocols import CheckResult, ContainerSet, SnapshotSpec
8
- from open_range.validator._golden_path import execute_step
9
 
10
  logger = logging.getLogger(__name__)
11
 
@@ -43,13 +43,21 @@ class ExploitabilityCheck:
43
  continue
44
  host = getattr(step, "host", None) or "attacker"
45
  try:
46
- output = await execute_step(snapshot, containers, step.command, host=host)
47
  except Exception as exc: # noqa: BLE001
48
  failed_steps.append({
49
  "step": step.step,
50
  "error": str(exc),
51
  })
52
  continue
 
 
 
 
 
 
 
 
53
 
54
  expected = step.expect_in_stdout
55
  if not expected:
 
5
  import logging
6
 
7
  from open_range.protocols import CheckResult, ContainerSet, SnapshotSpec
8
+ from open_range.validator._golden_path import execute_step_result
9
 
10
  logger = logging.getLogger(__name__)
11
 
 
43
  continue
44
  host = getattr(step, "host", None) or "attacker"
45
  try:
46
+ result = await execute_step_result(snapshot, containers, step.command, host=host)
47
  except Exception as exc: # noqa: BLE001
48
  failed_steps.append({
49
  "step": step.step,
50
  "error": str(exc),
51
  })
52
  continue
53
+ if result.exit_code != 0:
54
+ failed_steps.append({
55
+ "step": step.step,
56
+ "error": f"command failed with exit_code={result.exit_code}",
57
+ "got_snippet": result.combined_output[:300],
58
+ })
59
+ continue
60
+ output = result.combined_output
61
 
62
  expected = step.expect_in_stdout
63
  if not expected:
src/open_range/validator/patchability.py CHANGED
@@ -11,7 +11,7 @@ import logging
11
  import re
12
 
13
  from open_range.protocols import CheckResult, ContainerSet, ExploitStep, SnapshotSpec
14
- from open_range.validator._golden_path import execute_step
15
 
16
  logger = logging.getLogger(__name__)
17
 
@@ -126,7 +126,7 @@ class PatchabilityCheck:
126
 
127
  # --- Apply remediation -------------------------------------------------
128
  try:
129
- await containers.exec(host, vuln.remediation)
130
  except Exception as exc: # noqa: BLE001
131
  results.append({"vuln": vuln.id, "error": f"remediation failed: {exc}"})
132
  all_ok = False
@@ -136,26 +136,63 @@ class PatchabilityCheck:
136
  except Exception: # noqa: BLE001
137
  pass
138
  continue
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
 
140
  # --- Re-run linked golden-path step — must now fail --------------------
141
  step_still_works = False
 
 
142
  matched_any = False
143
  for chain_step in linked_steps:
144
  gp_step = _find_golden_step(snapshot, chain_step)
145
  if gp_step is None:
146
  continue
147
  matched_any = True
 
 
 
 
 
 
 
148
  try:
149
- output = await execute_step(
150
  snapshot,
151
  containers,
152
  gp_step.command,
153
  host=getattr(gp_step, "host", None) or "attacker",
154
  )
155
- except Exception: # noqa: BLE001
156
- continue # exec failure counts as "step failed" — good
157
-
158
- if gp_step.expect_in_stdout and gp_step.expect_in_stdout in output:
 
 
 
 
 
 
 
 
 
 
 
 
159
  step_still_works = True
160
 
161
  if not matched_any:
@@ -172,7 +209,15 @@ class PatchabilityCheck:
172
 
173
  tested_count += 1
174
 
175
- if step_still_works:
 
 
 
 
 
 
 
 
176
  results.append({
177
  "vuln": vuln.id,
178
  "passed": False,
 
11
  import re
12
 
13
  from open_range.protocols import CheckResult, ContainerSet, ExploitStep, SnapshotSpec
14
+ from open_range.validator._golden_path import execute_step_result
15
 
16
  logger = logging.getLogger(__name__)
17
 
 
126
 
127
  # --- Apply remediation -------------------------------------------------
128
  try:
129
+ remediation_result = await containers.exec_run(host, vuln.remediation)
130
  except Exception as exc: # noqa: BLE001
131
  results.append({"vuln": vuln.id, "error": f"remediation failed: {exc}"})
132
  all_ok = False
 
136
  except Exception: # noqa: BLE001
137
  pass
138
  continue
139
+ if remediation_result.exit_code != 0:
140
+ results.append({
141
+ "vuln": vuln.id,
142
+ "passed": False,
143
+ "reason": (
144
+ "remediation command failed "
145
+ f"(exit_code={remediation_result.exit_code})"
146
+ ),
147
+ "output_snippet": remediation_result.combined_output[:300],
148
+ })
149
+ all_ok = False
150
+ try:
151
+ await containers.restart(host)
152
+ except Exception: # noqa: BLE001
153
+ pass
154
+ continue
155
 
156
  # --- Re-run linked golden-path step — must now fail --------------------
157
  step_still_works = False
158
+ step_inconclusive = False
159
+ inconclusive_details: list[dict[str, object]] = []
160
  matched_any = False
161
  for chain_step in linked_steps:
162
  gp_step = _find_golden_step(snapshot, chain_step)
163
  if gp_step is None:
164
  continue
165
  matched_any = True
166
+ if not gp_step.expect_in_stdout:
167
+ step_inconclusive = True
168
+ inconclusive_details.append({
169
+ "step": gp_step.step,
170
+ "reason": "missing expect_in_stdout for retest step",
171
+ })
172
+ continue
173
  try:
174
+ result = await execute_step_result(
175
  snapshot,
176
  containers,
177
  gp_step.command,
178
  host=getattr(gp_step, "host", None) or "attacker",
179
  )
180
+ except Exception as exc: # noqa: BLE001
181
+ step_inconclusive = True
182
+ inconclusive_details.append({
183
+ "step": gp_step.step,
184
+ "reason": f"retest execution raised: {exc}",
185
+ })
186
+ continue
187
+ if result.exit_code != 0:
188
+ step_inconclusive = True
189
+ inconclusive_details.append({
190
+ "step": gp_step.step,
191
+ "reason": f"retest command failed (exit_code={result.exit_code})",
192
+ "output_snippet": result.combined_output[:300],
193
+ })
194
+ continue
195
+ if gp_step.expect_in_stdout in result.combined_output:
196
  step_still_works = True
197
 
198
  if not matched_any:
 
209
 
210
  tested_count += 1
211
 
212
+ if step_inconclusive:
213
+ results.append({
214
+ "vuln": vuln.id,
215
+ "passed": False,
216
+ "reason": "retest inconclusive after remediation",
217
+ "details": inconclusive_details,
218
+ })
219
+ all_ok = False
220
+ elif step_still_works:
221
  results.append({
222
  "vuln": vuln.id,
223
  "passed": False,
src/open_range/validator/reward_grounding.py CHANGED
@@ -82,11 +82,20 @@ class RewardGroundingCheck:
82
  f"-e {shlex.quote(query)}"
83
  )
84
  try:
85
- output = await containers.exec(host, mysql_cmd)
86
- output = output.strip()
87
  except Exception as exc: # noqa: BLE001
88
  bad.append({"flag": flag.id, "error": str(exc)})
89
  continue
 
 
 
 
 
 
 
 
 
 
90
 
91
  if flag.value not in output:
92
  bad.append({
@@ -106,11 +115,20 @@ class RewardGroundingCheck:
106
  continue
107
 
108
  try:
109
- output = await containers.exec(host, f"cat -- {shlex.quote(path)}")
110
- output = output.strip()
111
  except Exception as exc: # noqa: BLE001
112
  bad.append({"flag": flag.id, "error": str(exc)})
113
  continue
 
 
 
 
 
 
 
 
 
 
114
 
115
  if flag.value not in output:
116
  bad.append({
 
82
  f"-e {shlex.quote(query)}"
83
  )
84
  try:
85
+ result = await containers.exec_run(host, mysql_cmd)
 
86
  except Exception as exc: # noqa: BLE001
87
  bad.append({"flag": flag.id, "error": str(exc)})
88
  continue
89
+ if result.exit_code != 0:
90
+ bad.append({
91
+ "flag": flag.id,
92
+ "error": (
93
+ result.combined_output
94
+ or f"mysql command failed (exit_code={result.exit_code})"
95
+ ),
96
+ })
97
+ continue
98
+ output = result.stdout.strip() or result.combined_output.strip()
99
 
100
  if flag.value not in output:
101
  bad.append({
 
115
  continue
116
 
117
  try:
118
+ result = await containers.exec_run(host, f"cat -- {shlex.quote(path)}")
 
119
  except Exception as exc: # noqa: BLE001
120
  bad.append({"flag": flag.id, "error": str(exc)})
121
  continue
122
+ if result.exit_code != 0:
123
+ bad.append({
124
+ "flag": flag.id,
125
+ "error": (
126
+ result.combined_output
127
+ or f"cat command failed (exit_code={result.exit_code})"
128
+ ),
129
+ })
130
+ continue
131
+ output = result.stdout.strip() or result.combined_output.strip()
132
 
133
  if flag.value not in output:
134
  bad.append({
tests/conftest.py CHANGED
@@ -176,14 +176,31 @@ def mock_containers():
176
  class MockContainerSet:
177
  def __init__(self):
178
  self.exec_results = {} # {(container, cmd_fragment): output}
 
179
  self.healthy = set()
180
  self.restarted = [] # track restart calls: list of container names
181
 
182
- async def exec(self, container: str, cmd: str, **kwargs) -> str:
183
- for (c, pattern), result in self.exec_results.items():
 
184
  if c == container and pattern in cmd:
185
  return result
186
- return ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
187
 
188
  async def is_healthy(self, container: str) -> bool:
189
  return container in self.healthy
 
176
  class MockContainerSet:
177
  def __init__(self):
178
  self.exec_results = {} # {(container, cmd_fragment): output}
179
+ self.exec_status = {} # {(container, cmd_fragment): exit_code}
180
  self.healthy = set()
181
  self.restarted = [] # track restart calls: list of container names
182
 
183
+ @staticmethod
184
+ def _lookup(mapping, container: str, cmd: str):
185
+ for (c, pattern), result in mapping.items():
186
  if c == container and pattern in cmd:
187
  return result
188
+ return None
189
+
190
+ async def exec_run(self, container: str, cmd: str, **kwargs):
191
+ from open_range.protocols import ExecResult
192
+
193
+ output = self._lookup(self.exec_results, container, cmd)
194
+ status = self._lookup(self.exec_status, container, cmd)
195
+ text = output if isinstance(output, str) else ""
196
+ code = int(status) if status is not None else 0
197
+ if code == 0:
198
+ return ExecResult(stdout=text, exit_code=0)
199
+ return ExecResult(stderr=text, exit_code=code)
200
+
201
+ async def exec(self, container: str, cmd: str, **kwargs) -> str:
202
+ result = await self.exec_run(container, cmd, **kwargs)
203
+ return result.combined_output
204
 
205
  async def is_healthy(self, container: str) -> bool:
206
  return container in self.healthy
tests/test_validator.py CHANGED
@@ -336,6 +336,7 @@ async def test_exploitability_skips_meta_commands(mock_containers):
336
  assert result.details["skipped_steps"] == [2]
337
 
338
 
 
339
  async def test_exploitability_fails_when_expectation_missing_in_strict_mode(mock_containers):
340
  from open_range.validator.exploitability import ExploitabilityCheck
341
 
@@ -371,6 +372,23 @@ async def test_exploitability_allows_missing_expectation_in_lenient_mode(mock_co
371
  assert result.details["unvalidated_steps"] == [1]
372
 
373
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
374
  # ---------------------------------------------------------------------------
375
  # Check 3: Patchability
376
  # ---------------------------------------------------------------------------
@@ -463,6 +481,82 @@ async def test_patchability_fails_when_exploit_still_works(mock_containers):
463
  assert "exploitable after remediation" in result.error
464
 
465
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
466
  @pytest.mark.asyncio
467
  async def test_patchability_skips_prose_remediation(mock_containers):
468
  """Non-executable remediation (prose) is skipped with warning, fails if all skipped."""
@@ -632,13 +726,14 @@ async def test_evidence_fails_when_grep_returns_error_text(mock_containers):
632
  ]
633
  )
634
  mock_containers.exec_results[("siem", "grep")] = "grep: /var/log/missing.log: No such file or directory"
 
635
  result = await EvidenceCheck().check(spec, mock_containers)
636
  assert result.passed is False
637
  assert "No such file or directory" in result.details["missing"][0]["error"]
638
 
639
 
640
  @pytest.mark.asyncio
641
- async def test_evidence_fails_on_nonzero_exit_marker_even_when_output_present(mock_containers):
642
  from open_range.validator.evidence import EvidenceCheck
643
 
644
  spec = SnapshotSpec(
@@ -646,7 +741,7 @@ async def test_evidence_fails_on_nonzero_exit_marker_even_when_output_present(mo
646
  EvidenceItem(type="artifact", location="siem:/var/log/test.log"),
647
  ]
648
  )
649
- mock_containers.exec_results[("siem", "test -f")] = "exists\n__OPENRANGE_RC__:1"
650
  result = await EvidenceCheck().check(spec, mock_containers)
651
  assert result.passed is False
652
  assert result.details["missing"][0]["location"] == "siem:/var/log/test.log"
@@ -742,9 +837,14 @@ async def test_reward_grounding_quotes_filesystem_path():
742
  def __init__(self):
743
  self.calls: list[tuple[str, str]] = []
744
 
745
- async def exec(self, container: str, cmd: str, **kwargs) -> str:
 
 
746
  self.calls.append((container, cmd))
747
- return "FLAG{abc}"
 
 
 
748
 
749
  containers = RecordingContainers()
750
  spec = SnapshotSpec(
@@ -765,9 +865,14 @@ async def test_reward_grounding_rejects_invalid_db_identifier_path():
765
  def __init__(self):
766
  self.calls: list[tuple[str, str]] = []
767
 
768
- async def exec(self, container: str, cmd: str, **kwargs) -> str:
 
 
769
  self.calls.append((container, cmd))
770
- return "FLAG{abc}"
 
 
 
771
 
772
  containers = RecordingContainers()
773
  spec = SnapshotSpec(
@@ -797,9 +902,14 @@ async def test_reward_grounding_quotes_mysql_password_from_snapshot():
797
  def __init__(self):
798
  self.calls: list[tuple[str, str]] = []
799
 
800
- async def exec(self, container: str, cmd: str, **kwargs) -> str:
 
 
801
  self.calls.append((container, cmd))
802
- return "FLAG{abc}"
 
 
 
803
 
804
  containers = RecordingContainers()
805
  password = "pa ss;$(id)"
 
336
  assert result.details["skipped_steps"] == [2]
337
 
338
 
339
+ @pytest.mark.asyncio
340
  async def test_exploitability_fails_when_expectation_missing_in_strict_mode(mock_containers):
341
  from open_range.validator.exploitability import ExploitabilityCheck
342
 
 
372
  assert result.details["unvalidated_steps"] == [1]
373
 
374
 
375
+ @pytest.mark.asyncio
376
+ async def test_exploitability_fails_on_nonzero_exit_even_with_expected_output(mock_containers):
377
+ from open_range.validator.exploitability import ExploitabilityCheck
378
+
379
+ spec = SnapshotSpec(
380
+ golden_path=[
381
+ GoldenPathStep(step=1, command="curl http://web/", expect_in_stdout="Welcome"),
382
+ ],
383
+ )
384
+ mock_containers.exec_results[("attacker", "curl http://web/")] = "Welcome"
385
+ mock_containers.exec_status[("attacker", "curl http://web/")] = 7
386
+
387
+ result = await ExploitabilityCheck().check(spec, mock_containers)
388
+ assert result.passed is False
389
+ assert result.details["failed_steps"][0]["error"] == "command failed with exit_code=7"
390
+
391
+
392
  # ---------------------------------------------------------------------------
393
  # Check 3: Patchability
394
  # ---------------------------------------------------------------------------
 
481
  assert "exploitable after remediation" in result.error
482
 
483
 
484
+ @pytest.mark.asyncio
485
+ async def test_patchability_fails_when_remediation_command_exits_nonzero(mock_containers):
486
+ from open_range.protocols import ExploitStep
487
+ from open_range.validator.patchability import PatchabilityCheck
488
+
489
+ spec = SnapshotSpec(
490
+ truth_graph=TruthGraph(
491
+ vulns=[
492
+ Vulnerability(
493
+ id="v1",
494
+ type="sqli",
495
+ host="web",
496
+ remediation="sed -i 's/unsafe/safe/' /var/www/app.php",
497
+ ),
498
+ ],
499
+ exploit_chain=[
500
+ ExploitStep(vuln_id="v1", command="curl http://web/search?q=exploit"),
501
+ ],
502
+ ),
503
+ golden_path=[
504
+ GoldenPathStep(
505
+ step=1,
506
+ command="curl http://web/search?q=exploit",
507
+ expect_in_stdout="SECRET_DATA",
508
+ ),
509
+ ],
510
+ )
511
+ mock_containers.exec_results[("web", "sed")] = "sed: cannot read /var/www/app.php"
512
+ mock_containers.exec_status[("web", "sed")] = 2
513
+
514
+ result = await PatchabilityCheck().check(spec, mock_containers)
515
+ assert result.passed is False
516
+ first = result.details["vuln_results"][0]
517
+ assert first["passed"] is False
518
+ assert "remediation command failed" in first["reason"]
519
+
520
+
521
+ @pytest.mark.asyncio
522
+ async def test_patchability_fails_when_retest_command_is_inconclusive(mock_containers):
523
+ from open_range.protocols import ExploitStep
524
+ from open_range.validator.patchability import PatchabilityCheck
525
+
526
+ spec = SnapshotSpec(
527
+ truth_graph=TruthGraph(
528
+ vulns=[
529
+ Vulnerability(
530
+ id="v1",
531
+ type="sqli",
532
+ host="web",
533
+ remediation="sed -i 's/unsafe/safe/' /var/www/app.php",
534
+ ),
535
+ ],
536
+ exploit_chain=[
537
+ ExploitStep(vuln_id="v1", command="curl http://web/search?q=exploit"),
538
+ ],
539
+ ),
540
+ golden_path=[
541
+ GoldenPathStep(
542
+ step=1,
543
+ command="curl http://web/search?q=exploit",
544
+ expect_in_stdout="SECRET_DATA",
545
+ ),
546
+ ],
547
+ )
548
+
549
+ mock_containers.exec_results[("web", "sed")] = ""
550
+ mock_containers.exec_results[("attacker", "curl http://web/search?q=exploit")] = "curl: (7) failed to connect"
551
+ mock_containers.exec_status[("attacker", "curl http://web/search?q=exploit")] = 7
552
+
553
+ result = await PatchabilityCheck().check(spec, mock_containers)
554
+ assert result.passed is False
555
+ vuln_result = result.details["vuln_results"][0]
556
+ assert vuln_result["passed"] is False
557
+ assert vuln_result["reason"] == "retest inconclusive after remediation"
558
+
559
+
560
  @pytest.mark.asyncio
561
  async def test_patchability_skips_prose_remediation(mock_containers):
562
  """Non-executable remediation (prose) is skipped with warning, fails if all skipped."""
 
726
  ]
727
  )
728
  mock_containers.exec_results[("siem", "grep")] = "grep: /var/log/missing.log: No such file or directory"
729
+ mock_containers.exec_status[("siem", "grep")] = 2
730
  result = await EvidenceCheck().check(spec, mock_containers)
731
  assert result.passed is False
732
  assert "No such file or directory" in result.details["missing"][0]["error"]
733
 
734
 
735
  @pytest.mark.asyncio
736
+ async def test_evidence_fails_on_nonzero_exit_even_when_output_present(mock_containers):
737
  from open_range.validator.evidence import EvidenceCheck
738
 
739
  spec = SnapshotSpec(
 
741
  EvidenceItem(type="artifact", location="siem:/var/log/test.log"),
742
  ]
743
  )
744
+ mock_containers.exec_status[("siem", "test -f")] = 1
745
  result = await EvidenceCheck().check(spec, mock_containers)
746
  assert result.passed is False
747
  assert result.details["missing"][0]["location"] == "siem:/var/log/test.log"
 
837
  def __init__(self):
838
  self.calls: list[tuple[str, str]] = []
839
 
840
+ async def exec_run(self, container: str, cmd: str, **kwargs):
841
+ from open_range.protocols import ExecResult
842
+
843
  self.calls.append((container, cmd))
844
+ return ExecResult(stdout="FLAG{abc}", exit_code=0)
845
+
846
+ async def exec(self, container: str, cmd: str, **kwargs) -> str:
847
+ return (await self.exec_run(container, cmd, **kwargs)).combined_output
848
 
849
  containers = RecordingContainers()
850
  spec = SnapshotSpec(
 
865
  def __init__(self):
866
  self.calls: list[tuple[str, str]] = []
867
 
868
+ async def exec_run(self, container: str, cmd: str, **kwargs):
869
+ from open_range.protocols import ExecResult
870
+
871
  self.calls.append((container, cmd))
872
+ return ExecResult(stdout="FLAG{abc}", exit_code=0)
873
+
874
+ async def exec(self, container: str, cmd: str, **kwargs) -> str:
875
+ return (await self.exec_run(container, cmd, **kwargs)).combined_output
876
 
877
  containers = RecordingContainers()
878
  spec = SnapshotSpec(
 
902
  def __init__(self):
903
  self.calls: list[tuple[str, str]] = []
904
 
905
+ async def exec_run(self, container: str, cmd: str, **kwargs):
906
+ from open_range.protocols import ExecResult
907
+
908
  self.calls.append((container, cmd))
909
+ return ExecResult(stdout="FLAG{abc}", exit_code=0)
910
+
911
+ async def exec(self, container: str, cmd: str, **kwargs) -> str:
912
+ return (await self.exec_run(container, cmd, **kwargs)).combined_output
913
 
914
  containers = RecordingContainers()
915
  password = "pa ss;$(id)"