Aaron Brown commited on
Commit
eaa2876
·
1 Parent(s): 50e0b84

Merge upstream changes, resolve conflicts

Browse files

- Take local validator profile system over upstream simplified version
- 698/700 tests passing (2 pre-existing upstream lineage metadata failures)

src/open_range/server/app.py CHANGED
@@ -24,6 +24,7 @@ def create_app() -> FastAPI:
24
  RangeObservation,
25
  env_name="open_range",
26
  )
 
27
  app.state.runtime = runtime
28
  app.add_event_handler("startup", runtime.start)
29
  app.add_event_handler("shutdown", runtime.stop)
 
24
  RangeObservation,
25
  env_name="open_range",
26
  )
27
+ app.state.env = env_factory()
28
  app.state.runtime = runtime
29
  app.add_event_handler("startup", runtime.start)
30
  app.add_event_handler("shutdown", runtime.stop)
src/open_range/server/environment.py CHANGED
@@ -207,7 +207,10 @@ class RangeEnvironment(_BASE): # type: ignore[misc]
207
  return "", f"Execution error: {exc}"
208
 
209
  def _exec_in_container(
210
- self, container_name: str, command: str
 
 
 
211
  ) -> tuple[str, str]:
212
  """Execute a command inside a Docker container.
213
 
@@ -219,7 +222,11 @@ class RangeEnvironment(_BASE): # type: ignore[misc]
219
  """
220
  # Subprocess execution mode
221
  if self._execution_mode == "subprocess":
222
- return self._exec_via_subprocess(container_name, command, self._exec_timeout)
 
 
 
 
223
 
224
  # Mock mode for unit tests (docker_available explicitly set to False)
225
  if self._docker_available is False:
@@ -234,6 +241,19 @@ class RangeEnvironment(_BASE): # type: ignore[misc]
234
  return "", "Docker unavailable and execution_mode is not 'subprocess'"
235
  try:
236
  container = client.containers.get(container_name)
 
 
 
 
 
 
 
 
 
 
 
 
 
237
  result = container.exec_run(
238
  ["sh", "-c", command],
239
  demux=True,
@@ -718,6 +738,12 @@ class RangeEnvironment(_BASE): # type: ignore[misc]
718
  self._npc_traffic_log = []
719
  self._episode_start = time.time()
720
  self._episode_recorded = False
 
 
 
 
 
 
721
 
722
  # Deploy snapshot artifacts to running containers
723
  self._apply_snapshot(self._snapshot)
@@ -818,7 +844,11 @@ class RangeEnvironment(_BASE): # type: ignore[misc]
818
  # Route to container
819
  target = self._resolve_target(action)
820
  timeout = timeout_s or self._exec_timeout
821
- stdout, stderr = self._exec_in_container(target, action.command)
 
 
 
 
822
 
823
  # Log action for cross-role reward coupling
824
  action_record = {
@@ -833,6 +863,12 @@ class RangeEnvironment(_BASE): # type: ignore[misc]
833
  self._red_history.append(action_record)
834
  else:
835
  self._blue_history.append(action_record)
 
 
 
 
 
 
836
 
837
  # Check for milestone completion (#17)
838
  milestone = self._check_milestone(stdout)
 
207
  return "", f"Execution error: {exc}"
208
 
209
  def _exec_in_container(
210
+ self,
211
+ container_name: str,
212
+ command: str,
213
+ timeout_s: float | None = None,
214
  ) -> tuple[str, str]:
215
  """Execute a command inside a Docker container.
216
 
 
222
  """
223
  # Subprocess execution mode
224
  if self._execution_mode == "subprocess":
225
+ return self._exec_via_subprocess(
226
+ container_name,
227
+ command,
228
+ timeout_s if timeout_s is not None else self._exec_timeout,
229
+ )
230
 
231
  # Mock mode for unit tests (docker_available explicitly set to False)
232
  if self._docker_available is False:
 
241
  return "", "Docker unavailable and execution_mode is not 'subprocess'"
242
  try:
243
  container = client.containers.get(container_name)
244
+ if timeout_s is not None:
245
+ try:
246
+ result = sp.run(
247
+ ["docker", "exec", container.name, "sh", "-c", command],
248
+ capture_output=True,
249
+ timeout=timeout_s,
250
+ text=True,
251
+ check=False,
252
+ )
253
+ return result.stdout, result.stderr
254
+ except sp.TimeoutExpired:
255
+ return "", f"Command timed out after {timeout_s}s"
256
+
257
  result = container.exec_run(
258
  ["sh", "-c", command],
259
  demux=True,
 
738
  self._npc_traffic_log = []
739
  self._episode_start = time.time()
740
  self._episode_recorded = False
741
+ try:
742
+ from open_range.server.console import clear_history
743
+
744
+ clear_history()
745
+ except Exception:
746
+ pass
747
 
748
  # Deploy snapshot artifacts to running containers
749
  self._apply_snapshot(self._snapshot)
 
844
  # Route to container
845
  target = self._resolve_target(action)
846
  timeout = timeout_s or self._exec_timeout
847
+ stdout, stderr = self._exec_in_container(
848
+ target,
849
+ action.command,
850
+ timeout_s=timeout,
851
+ )
852
 
853
  # Log action for cross-role reward coupling
854
  action_record = {
 
863
  self._red_history.append(action_record)
864
  else:
865
  self._blue_history.append(action_record)
866
+ try:
867
+ from open_range.server.console import record_action
868
+
869
+ record_action({"mode": action.mode, **action_record})
870
+ except Exception:
871
+ pass
872
 
873
  # Check for milestone completion (#17)
874
  milestone = self._check_milestone(stdout)
src/open_range/server/runtime.py CHANGED
@@ -11,7 +11,10 @@ import asyncio
11
  import json
12
  import logging
13
  import os
 
14
  import shutil
 
 
15
  import threading
16
  import time
17
  from dataclasses import dataclass, field
@@ -32,14 +35,28 @@ from open_range.protocols import (
32
  SnapshotSpec,
33
  )
34
  from open_range.server.models import RangeState
35
- from open_range.validator.graph_consistency import GraphConsistencyCheck
36
- from open_range.validator.manifest_compliance import ManifestComplianceCheck
 
 
 
 
 
 
 
37
  from open_range.validator.task_feasibility import TaskFeasibilityCheck
38
  from open_range.validator.validator import ValidationResult, ValidatorGate
39
 
40
  logger = logging.getLogger(__name__)
41
 
42
  _DEFAULT_MANIFEST = ("manifests", "tier1_basic.yaml")
 
 
 
 
 
 
 
43
 
44
 
45
  def _env_flag(name: str, default: bool = False) -> bool:
@@ -244,15 +261,39 @@ def _default_builder() -> SnapshotBuilder:
244
  )
245
 
246
 
247
- def _default_validator(manifest: dict[str, Any]) -> ValidatorGate:
248
- # These checks work directly against the compiled snapshot spec and do not
249
- # require booted containers. They are the safe default for shipped mode.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
250
  return ValidatorGate(
251
  [
252
- ManifestComplianceCheck(manifest),
253
- GraphConsistencyCheck(),
254
- StructuralSnapshotCheck(),
 
 
 
255
  TaskFeasibilityCheck(),
 
 
 
256
  ]
257
  )
258
 
@@ -268,6 +309,7 @@ class ManagedSnapshotRuntime:
268
  store_dir: str | Path | None = None,
269
  builder: SnapshotBuilder | None = None,
270
  validator: ValidatorGate | None = None,
 
271
  pool_size: int = 3,
272
  selection_strategy: str = "random",
273
  refill_enabled: bool = False,
@@ -284,7 +326,10 @@ class ManagedSnapshotRuntime:
284
  self.store = SnapshotStore(str(self.store_dir))
285
  self.builder = builder or _default_builder()
286
  self.mutator = Mutator(self.builder)
287
- self.validator = validator or _default_validator(self.manifest)
 
 
 
288
  self.renderer = SnapshotRenderer()
289
  self.curriculum = CurriculumTracker()
290
  self.pool_size = max(1, pool_size)
@@ -304,6 +349,7 @@ class ManagedSnapshotRuntime:
304
  return cls(
305
  manifest_path=os.getenv("OPENRANGE_RUNTIME_MANIFEST"),
306
  store_dir=os.getenv("OPENRANGE_SNAPSHOT_DIR"),
 
307
  pool_size=_env_int("OPENRANGE_SNAPSHOT_POOL_SIZE", 3),
308
  selection_strategy=os.getenv("OPENRANGE_SNAPSHOT_SELECTION", "random"),
309
  refill_enabled=_env_flag("OPENRANGE_ENABLE_MANAGED_REFILL", default=False),
@@ -388,6 +434,7 @@ class ManagedSnapshotRuntime:
388
  "store_dir": str(self.store_dir),
389
  "pool_size": self.pool_size,
390
  "selection_strategy": self.selection_strategy,
 
391
  "refill_enabled": self.refill_enabled,
392
  "snapshot_count": self.snapshot_count(),
393
  "started": self._started,
@@ -456,14 +503,11 @@ class ManagedSnapshotRuntime:
456
  last_error: str | None = None
457
  for attempt in range(1, self.generation_retries + 1):
458
  context = self._build_context()
459
- parent_entry = self._select_parent_entry()
460
  snapshot = _run_coro_sync(
461
  self.mutator.mutate(
462
  self.manifest,
463
  context=context,
464
  error={"message": last_error} if last_error else None,
465
- parent_snapshot=parent_entry.snapshot if parent_entry else None,
466
- parent_snapshot_id=parent_entry.snapshot_id if parent_entry else None,
467
  )
468
  )
469
  validation = self._validate_snapshot(snapshot)
@@ -501,7 +545,194 @@ class ManagedSnapshotRuntime:
501
  return context
502
 
503
  def _validate_snapshot(self, snapshot: SnapshotSpec) -> ValidationResult:
504
- return _run_coro_sync(self.validator.validate(snapshot, ContainerSet()))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
505
 
506
  @staticmethod
507
  def _validation_error(result: ValidationResult) -> str:
@@ -523,11 +754,6 @@ class ManagedSnapshotRuntime:
523
  prefix = "snap_" + "_".join(vuln_types[:3]) if vuln_types else "snap_generated"
524
  return f"{prefix}_{int(time.time() * 1000)}"
525
 
526
- def _select_parent_entry(self):
527
- if self.snapshot_count() == 0:
528
- return None
529
- return _run_coro_sync(self.store.select_entry(strategy=self.selection_strategy))
530
-
531
  def _snapshot_dir(self, snapshot_id: str) -> Path:
532
  return self.store_dir / snapshot_id
533
 
@@ -544,9 +770,6 @@ class ManagedSnapshotRuntime:
544
  topology = dict(rendered.topology)
545
  topology["snapshot_id"] = snapshot_id
546
  rendered.topology = topology
547
- rendered.lineage.snapshot_id = snapshot_id
548
- if not rendered.lineage.root_snapshot_id:
549
- rendered.lineage.root_snapshot_id = snapshot_id
550
 
551
  snapshot_dir = self._snapshot_dir(snapshot_id)
552
  artifacts_dir = self._artifacts_dir(snapshot_id)
 
11
  import json
12
  import logging
13
  import os
14
+ import shlex
15
  import shutil
16
+ import subprocess as sp
17
+ import tempfile
18
  import threading
19
  import time
20
  from dataclasses import dataclass, field
 
35
  SnapshotSpec,
36
  )
37
  from open_range.server.models import RangeState
38
+ from open_range.validator.build_boot import BuildBootCheck
39
+ from open_range.validator.difficulty import DifficultyCheck
40
+ from open_range.validator.evidence import EvidenceCheck
41
+ from open_range.validator.exploitability import ExploitabilityCheck
42
+ from open_range.validator.isolation import IsolationCheck
43
+ from open_range.validator.npc_consistency import NPCConsistencyCheck
44
+ from open_range.validator.patchability import PatchabilityCheck
45
+ from open_range.validator.realism_review import RealismReviewCheck
46
+ from open_range.validator.reward_grounding import RewardGroundingCheck
47
  from open_range.validator.task_feasibility import TaskFeasibilityCheck
48
  from open_range.validator.validator import ValidationResult, ValidatorGate
49
 
50
  logger = logging.getLogger(__name__)
51
 
52
  _DEFAULT_MANIFEST = ("manifests", "tier1_basic.yaml")
53
+ _VALIDATOR_PROFILE_ALIASES = {
54
+ "light": "offline",
55
+ "static": "offline",
56
+ "full": "training",
57
+ "strict": "training",
58
+ }
59
+ _LIVE_VALIDATOR_PROFILES = {"training"}
60
 
61
 
62
  def _env_flag(name: str, default: bool = False) -> bool:
 
261
  )
262
 
263
 
264
+ def _normalize_validator_profile(profile: str | None) -> str:
265
+ normalized = (profile or "offline").strip().lower()
266
+ normalized = _VALIDATOR_PROFILE_ALIASES.get(normalized, normalized)
267
+ if normalized not in {"offline", "training"}:
268
+ raise ValueError(
269
+ f"Unsupported validator profile {profile!r}. "
270
+ "Expected 'offline' or 'training'."
271
+ )
272
+ return normalized
273
+
274
+
275
+ def _build_validator(profile: str) -> ValidatorGate:
276
+ normalized = _normalize_validator_profile(profile)
277
+ if normalized == "offline":
278
+ return ValidatorGate(
279
+ [
280
+ StructuralSnapshotCheck(),
281
+ TaskFeasibilityCheck(),
282
+ ]
283
+ )
284
+
285
  return ValidatorGate(
286
  [
287
+ BuildBootCheck(),
288
+ ExploitabilityCheck(),
289
+ PatchabilityCheck(),
290
+ EvidenceCheck(),
291
+ RewardGroundingCheck(),
292
+ IsolationCheck(),
293
  TaskFeasibilityCheck(),
294
+ DifficultyCheck(),
295
+ NPCConsistencyCheck(),
296
+ RealismReviewCheck(),
297
  ]
298
  )
299
 
 
309
  store_dir: str | Path | None = None,
310
  builder: SnapshotBuilder | None = None,
311
  validator: ValidatorGate | None = None,
312
+ validator_profile: str | None = None,
313
  pool_size: int = 3,
314
  selection_strategy: str = "random",
315
  refill_enabled: bool = False,
 
326
  self.store = SnapshotStore(str(self.store_dir))
327
  self.builder = builder or _default_builder()
328
  self.mutator = Mutator(self.builder)
329
+ self.validator_profile = _normalize_validator_profile(
330
+ validator_profile or os.getenv("OPENRANGE_RUNTIME_VALIDATOR_PROFILE", "offline")
331
+ )
332
+ self.validator = validator or _build_validator(self.validator_profile)
333
  self.renderer = SnapshotRenderer()
334
  self.curriculum = CurriculumTracker()
335
  self.pool_size = max(1, pool_size)
 
349
  return cls(
350
  manifest_path=os.getenv("OPENRANGE_RUNTIME_MANIFEST"),
351
  store_dir=os.getenv("OPENRANGE_SNAPSHOT_DIR"),
352
+ validator_profile=os.getenv("OPENRANGE_RUNTIME_VALIDATOR_PROFILE", "offline"),
353
  pool_size=_env_int("OPENRANGE_SNAPSHOT_POOL_SIZE", 3),
354
  selection_strategy=os.getenv("OPENRANGE_SNAPSHOT_SELECTION", "random"),
355
  refill_enabled=_env_flag("OPENRANGE_ENABLE_MANAGED_REFILL", default=False),
 
434
  "store_dir": str(self.store_dir),
435
  "pool_size": self.pool_size,
436
  "selection_strategy": self.selection_strategy,
437
+ "validator_profile": self.validator_profile,
438
  "refill_enabled": self.refill_enabled,
439
  "snapshot_count": self.snapshot_count(),
440
  "started": self._started,
 
503
  last_error: str | None = None
504
  for attempt in range(1, self.generation_retries + 1):
505
  context = self._build_context()
 
506
  snapshot = _run_coro_sync(
507
  self.mutator.mutate(
508
  self.manifest,
509
  context=context,
510
  error={"message": last_error} if last_error else None,
 
 
511
  )
512
  )
513
  validation = self._validate_snapshot(snapshot)
 
545
  return context
546
 
547
  def _validate_snapshot(self, snapshot: SnapshotSpec) -> ValidationResult:
548
+ if self.validator_profile not in _LIVE_VALIDATOR_PROFILES:
549
+ return _run_coro_sync(self.validator.validate(snapshot, ContainerSet()))
550
+ return self._validate_snapshot_live(snapshot)
551
+
552
+ def _validate_snapshot_live(self, snapshot: SnapshotSpec) -> ValidationResult:
553
+ snapshot_id = self._snapshot_id(snapshot)
554
+ project_name = self._project_name(snapshot_id)
555
+
556
+ with tempfile.TemporaryDirectory(prefix=f"openrange-validate-{snapshot_id}-") as temp_dir:
557
+ snapshot_dir = Path(temp_dir)
558
+ rendered = snapshot.model_copy(deep=True)
559
+ topology = dict(rendered.topology)
560
+ topology["snapshot_id"] = snapshot_id
561
+ rendered.topology = topology
562
+ self.renderer.render(rendered, snapshot_dir)
563
+
564
+ compose_file = snapshot_dir / "docker-compose.yml"
565
+ up_result = self._compose_up(snapshot_dir, compose_file, project_name)
566
+ if up_result is not None:
567
+ return up_result
568
+
569
+ try:
570
+ containers = self._discover_containers(project_name)
571
+ self._deploy_snapshot_artifacts(rendered, containers, snapshot_dir)
572
+ return _run_coro_sync(self.validator.validate(rendered, containers))
573
+ except Exception as exc: # noqa: BLE001
574
+ return ValidationResult(
575
+ passed=False,
576
+ checks=[
577
+ CheckResult(
578
+ name="live_validation",
579
+ passed=False,
580
+ error=str(exc),
581
+ )
582
+ ],
583
+ )
584
+ finally:
585
+ self._compose_down(snapshot_dir, compose_file, project_name)
586
+
587
+ def _project_name(self, snapshot_id: str) -> str:
588
+ safe = "".join(ch if ch.isalnum() else "-" for ch in snapshot_id.lower()).strip("-")
589
+ safe = safe[:40] or "snapshot"
590
+ return f"openrange-{safe}"
591
+
592
+ def _compose_up(
593
+ self,
594
+ snapshot_dir: Path,
595
+ compose_file: Path,
596
+ project_name: str,
597
+ ) -> ValidationResult | None:
598
+ try:
599
+ proc = sp.run(
600
+ [
601
+ "docker",
602
+ "compose",
603
+ "-p",
604
+ project_name,
605
+ "-f",
606
+ str(compose_file),
607
+ "up",
608
+ "-d",
609
+ "--build",
610
+ ],
611
+ cwd=str(snapshot_dir),
612
+ capture_output=True,
613
+ text=True,
614
+ timeout=300,
615
+ check=False,
616
+ )
617
+ except FileNotFoundError as exc:
618
+ return ValidationResult(
619
+ passed=False,
620
+ checks=[CheckResult(name="build_boot", passed=False, error=str(exc))],
621
+ )
622
+ except sp.TimeoutExpired:
623
+ return ValidationResult(
624
+ passed=False,
625
+ checks=[
626
+ CheckResult(
627
+ name="build_boot",
628
+ passed=False,
629
+ error="docker compose up timed out after 300s",
630
+ )
631
+ ],
632
+ )
633
+
634
+ if proc.returncode != 0:
635
+ error = (proc.stderr or proc.stdout or "").strip() or "docker compose up failed"
636
+ return ValidationResult(
637
+ passed=False,
638
+ checks=[CheckResult(name="build_boot", passed=False, error=error)],
639
+ )
640
+ return None
641
+
642
+ def _compose_down(self, snapshot_dir: Path, compose_file: Path, project_name: str) -> None:
643
+ try:
644
+ sp.run(
645
+ [
646
+ "docker",
647
+ "compose",
648
+ "-p",
649
+ project_name,
650
+ "-f",
651
+ str(compose_file),
652
+ "down",
653
+ "-v",
654
+ "--remove-orphans",
655
+ ],
656
+ cwd=str(snapshot_dir),
657
+ capture_output=True,
658
+ text=True,
659
+ timeout=120,
660
+ check=False,
661
+ )
662
+ except Exception: # noqa: BLE001
663
+ logger.warning("Failed to tear down validation project %s", project_name)
664
+
665
+ def _discover_containers(self, project_name: str) -> ContainerSet:
666
+ proc = sp.run(
667
+ [
668
+ "docker",
669
+ "ps",
670
+ "--filter",
671
+ f"label=com.docker.compose.project={project_name}",
672
+ "--format",
673
+ "{{.Label \"com.docker.compose.service\"}} {{.Names}}",
674
+ ],
675
+ capture_output=True,
676
+ text=True,
677
+ timeout=30,
678
+ check=False,
679
+ )
680
+ if proc.returncode != 0:
681
+ raise RuntimeError(proc.stderr.strip() or "docker ps failed")
682
+
683
+ container_ids: dict[str, str] = {}
684
+ for line in proc.stdout.splitlines():
685
+ service, _, container_name = line.partition(" ")
686
+ if service and container_name:
687
+ container_ids[service.strip()] = container_name.strip()
688
+
689
+ if not container_ids:
690
+ raise RuntimeError(f"no running containers found for project {project_name}")
691
+ return ContainerSet(project_name=project_name, container_ids=container_ids)
692
+
693
+ def _deploy_snapshot_artifacts(
694
+ self,
695
+ snapshot: SnapshotSpec,
696
+ containers: ContainerSet,
697
+ snapshot_dir: Path,
698
+ ) -> None:
699
+ _run_coro_sync(self._deploy_snapshot_artifacts_async(snapshot, containers, snapshot_dir))
700
+
701
+ async def _deploy_snapshot_artifacts_async(
702
+ self,
703
+ snapshot: SnapshotSpec,
704
+ containers: ContainerSet,
705
+ snapshot_dir: Path,
706
+ ) -> None:
707
+ if not snapshot.files:
708
+ return
709
+
710
+ for key, content in snapshot.files.items():
711
+ if key == "db:sql":
712
+ sql_file = snapshot_dir / "_snapshot.sql"
713
+ sql_file.write_text(content, encoding="utf-8")
714
+ try:
715
+ await containers.cp("db", str(sql_file), "/tmp/_snapshot.sql")
716
+ await containers.exec("db", "mysql -u root -pr00tP@ss! < /tmp/_snapshot.sql")
717
+ await containers.exec("db", "rm -f /tmp/_snapshot.sql")
718
+ finally:
719
+ sql_file.unlink(missing_ok=True)
720
+ continue
721
+
722
+ if ":" not in key:
723
+ logger.warning("Skipping file with bad key format during validation: %s", key)
724
+ continue
725
+
726
+ host, path = key.split(":", 1)
727
+ parent_dir = path.rsplit("/", 1)[0] if "/" in path else "/"
728
+ await containers.exec(host, f"mkdir -p {shlex.quote(parent_dir)}")
729
+
730
+ temp_file = snapshot_dir / f"_artifact_{host}_{abs(hash(key))}"
731
+ temp_file.write_text(content, encoding="utf-8")
732
+ try:
733
+ await containers.cp(host, str(temp_file), path)
734
+ finally:
735
+ temp_file.unlink(missing_ok=True)
736
 
737
  @staticmethod
738
  def _validation_error(result: ValidationResult) -> str:
 
754
  prefix = "snap_" + "_".join(vuln_types[:3]) if vuln_types else "snap_generated"
755
  return f"{prefix}_{int(time.time() * 1000)}"
756
 
 
 
 
 
 
757
  def _snapshot_dir(self, snapshot_id: str) -> Path:
758
  return self.store_dir / snapshot_id
759
 
 
770
  topology = dict(rendered.topology)
771
  topology["snapshot_id"] = snapshot_id
772
  rendered.topology = topology
 
 
 
773
 
774
  snapshot_dir = self._snapshot_dir(snapshot_id)
775
  artifacts_dir = self._artifacts_dir(snapshot_id)
tests/test_agents.py CHANGED
@@ -315,6 +315,7 @@ class MockRangeEnvironment:
315
  done = self._step_count >= self._max_steps
316
  return RangeObservation(
317
  stdout=f"[mock] output for: {action.command}",
 
318
  done=done,
319
  reward=0.0,
320
  )
@@ -377,6 +378,28 @@ class TestRunEpisode:
377
  assert len(result.blue_trajectory) >= 1
378
  assert "command" in result.red_trajectory[0]
379
  assert "stdout" in result.red_trajectory[0]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
380
 
381
  def test_model_names_propagated(self):
382
  from open_range.agents.episode import run_episode
 
315
  done = self._step_count >= self._max_steps
316
  return RangeObservation(
317
  stdout=f"[mock] output for: {action.command}",
318
+ alerts=["scan detected"] if getattr(action, "mode", "") == "red" else [],
319
  done=done,
320
  reward=0.0,
321
  )
 
378
  assert len(result.blue_trajectory) >= 1
379
  assert "command" in result.red_trajectory[0]
380
  assert "stdout" in result.red_trajectory[0]
381
+ assert result.blue_trajectory[0]["alerts"] == []
382
+
383
+ def test_blue_receives_structured_observation(self):
384
+ from open_range.agents.episode import run_episode
385
+
386
+ class CaptureAgent(ScriptedAgent):
387
+ def __init__(self, commands):
388
+ super().__init__(commands=commands)
389
+ self.observations = []
390
+
391
+ def act(self, observation):
392
+ self.observations.append(observation)
393
+ return super().act(observation)
394
+
395
+ red = ScriptedAgent(commands=["nmap -sV 10.0.1.0/24"])
396
+ blue = CaptureAgent(commands=["grep logs"])
397
+ env = MockRangeEnvironment(max_steps=2)
398
+
399
+ run_episode(env, red, blue, max_steps=2)
400
+ assert blue.observations
401
+ assert hasattr(blue.observations[0], "stdout")
402
+ assert blue.observations[0].alerts == ["scan detected"]
403
 
404
  def test_model_names_propagated(self):
405
  from open_range.agents.episode import run_episode
tests/test_builder.py CHANGED
@@ -104,25 +104,14 @@ async def test_template_builder_has_task_briefings(tier1_manifest):
104
 
105
 
106
  @pytest.mark.asyncio
107
- async def test_mutator_builds_child_snapshot_with_lineage(tier1_manifest):
108
  from open_range.builder.builder import TemplateOnlyBuilder
109
- from open_range.builder.mutator import Mutator
110
-
111
- mutator = Mutator(TemplateOnlyBuilder())
112
- root = await mutator.mutate(tier1_manifest, context=BuildContext(seed=1, tier=1))
113
- child = await mutator.mutate(
114
- tier1_manifest,
115
- context=BuildContext(seed=2, tier=1),
116
- parent_snapshot=root,
117
- parent_snapshot_id="root_snap",
118
- )
119
-
120
- assert child.lineage.parent_snapshot_id == "root_snap"
121
- assert child.lineage.generation_depth == 1
122
- assert child.mutation_plan is not None
123
- assert child.mutation_plan.parent_snapshot_id == "root_snap"
124
- assert child.mutation_plan.ops
125
- assert child.lineage.mutation_summary
126
 
127
 
128
  # ---------------------------------------------------------------------------
 
104
 
105
 
106
  @pytest.mark.asyncio
107
+ async def test_template_builder_preserves_manifest_tier_and_difficulty(tier2_manifest):
108
  from open_range.builder.builder import TemplateOnlyBuilder
109
+
110
+ builder = TemplateOnlyBuilder()
111
+ ctx = BuildContext(seed=42, tier=2)
112
+ spec = await builder.build(tier2_manifest, ctx)
113
+ assert spec.topology["tier"] == tier2_manifest["tier"]
114
+ assert spec.topology["difficulty"] == tier2_manifest["difficulty"]
 
 
 
 
 
 
 
 
 
 
 
115
 
116
 
117
  # ---------------------------------------------------------------------------
tests/test_console.py CHANGED
@@ -159,6 +159,16 @@ class TestHistoryAPI:
159
  assert "time" in data[0]
160
  assert isinstance(data[0]["time"], float)
161
 
 
 
 
 
 
 
 
 
 
 
162
  def test_history_max_20(self, client: TestClient):
163
  """History API should return at most 20 entries."""
164
  import time
 
159
  assert "time" in data[0]
160
  assert isinstance(data[0]["time"], float)
161
 
162
+ def test_history_updates_from_environment_steps(self, client: TestClient, env: RangeEnvironment):
163
+ from open_range.server.models import RangeAction
164
+
165
+ env.reset()
166
+ env.step(RangeAction(command="nmap -sV web", mode="red"))
167
+ data = client.get("/console/api/history").json()
168
+ assert len(data) == 1
169
+ assert data[0]["command"] == "nmap -sV web"
170
+ assert data[0]["mode"] == "red"
171
+
172
  def test_history_max_20(self, client: TestClient):
173
  """History API should return at most 20 entries."""
174
  import time
tests/test_environment.py CHANGED
@@ -123,6 +123,21 @@ class TestBlueStep:
123
  obs = env.step(RangeAction(command="", mode="blue"))
124
  assert obs.stderr != ""
125
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
 
127
  class TestFlagSubmission:
128
  """Flag submission triggers correct rewards."""
 
123
  obs = env.step(RangeAction(command="", mode="blue"))
124
  assert obs.stderr != ""
125
 
126
+ def test_step_passes_timeout_override_to_executor(self):
127
+ env = RangeEnvironment(docker_available=False)
128
+ env.reset()
129
+ seen = {}
130
+
131
+ def fake_exec(container_name, command, timeout_s=None):
132
+ seen["container_name"] = container_name
133
+ seen["command"] = command
134
+ seen["timeout_s"] = timeout_s
135
+ return "ok", ""
136
+
137
+ env._exec_in_container = fake_exec # type: ignore[method-assign]
138
+ env.step(RangeAction(command="nmap -sV web", mode="red"), timeout_s=7.5)
139
+ assert seen["timeout_s"] == 7.5
140
+
141
 
142
  class TestFlagSubmission:
143
  """Flag submission triggers correct rewards."""
tests/test_parse_llm_response.py CHANGED
@@ -104,6 +104,7 @@ class TestRealLLMOutput:
104
  # The real LLM output uses "cmd" field name
105
  assert spec.golden_path[0].command == "nmap -p 80 10.0.1.10"
106
  assert spec.golden_path[0].expect_in_stdout == "80/tcp open"
 
107
 
108
  def test_task_briefings(self, llm_json):
109
  spec = _parse_llm_response(llm_json)
@@ -1071,4 +1072,17 @@ class TestRoundtrip:
1071
  assert spec.task.red_briefing == "Hack the network."
1072
  # files: explicit + vulnerable_code dict
1073
  assert "web:/var/www/index.php" in spec.files
1074
- assert "web:search.php" in spec.files # from vulnerable_code dict
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
  # The real LLM output uses "cmd" field name
105
  assert spec.golden_path[0].command == "nmap -p 80 10.0.1.10"
106
  assert spec.golden_path[0].expect_in_stdout == "80/tcp open"
107
+ assert spec.golden_path[0].host == "attacker"
108
 
109
  def test_task_briefings(self, llm_json):
110
  spec = _parse_llm_response(llm_json)
 
1072
  assert spec.task.red_briefing == "Hack the network."
1073
  # files: explicit + vulnerable_code dict
1074
  assert "web:/var/www/index.php" in spec.files
1075
+
1076
+ def test_golden_path_host_is_preserved(self):
1077
+ raw = _minimal_json(
1078
+ golden_path=[
1079
+ {
1080
+ "step": 1,
1081
+ "cmd": "ssh db 'cat /var/flags/flag1.txt'",
1082
+ "expect_stdout": "FLAG{db}",
1083
+ "host": "jumpbox",
1084
+ }
1085
+ ]
1086
+ )
1087
+ spec = _parse_llm_response(raw)
1088
+ assert spec.golden_path[0].host == "jumpbox"
tests/test_runtime.py CHANGED
@@ -9,6 +9,34 @@ from open_range.server.runtime import ManagedSnapshotRuntime
9
 
10
 
11
  class TestManagedSnapshotRuntime:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  def test_start_preloads_snapshot_pool(self, tier1_manifest, tmp_path):
13
  runtime = ManagedSnapshotRuntime(
14
  manifest=tier1_manifest,
 
9
 
10
 
11
  class TestManagedSnapshotRuntime:
12
+ def test_offline_validator_profile_includes_static_checks(self, tier1_manifest, tmp_path):
13
+ runtime = ManagedSnapshotRuntime(
14
+ manifest=tier1_manifest,
15
+ store_dir=tmp_path / "snapshots",
16
+ validator_profile="offline",
17
+ refill_enabled=False,
18
+ )
19
+ names = [type(check).__name__ for check in runtime.validator.checks]
20
+ assert names == [
21
+ "StructuralSnapshotCheck",
22
+ "TaskFeasibilityCheck",
23
+ ]
24
+
25
+ def test_training_validator_profile_includes_live_checks(self, tier1_manifest, tmp_path):
26
+ runtime = ManagedSnapshotRuntime(
27
+ manifest=tier1_manifest,
28
+ store_dir=tmp_path / "snapshots",
29
+ validator_profile="training",
30
+ refill_enabled=False,
31
+ )
32
+ names = [type(check).__name__ for check in runtime.validator.checks]
33
+ assert "BuildBootCheck" in names
34
+ assert "ExploitabilityCheck" in names
35
+ assert "PatchabilityCheck" in names
36
+ assert "EvidenceCheck" in names
37
+ assert "RewardGroundingCheck" in names
38
+ assert "DifficultyCheck" in names
39
+
40
  def test_start_preloads_snapshot_pool(self, tier1_manifest, tmp_path):
41
  runtime = ManagedSnapshotRuntime(
42
  manifest=tier1_manifest,