Aaron Brown commited on
Commit
f016eb7
·
1 Parent(s): 1566173

Add episode CLI, synthetic data pipeline, NPC generalization, service manifest

Browse files

- Add `openrange episode` command (golden-path replay + interactive REPL)
- Add synthetic data generation pipeline with teacher agents and dataset helpers
- Generalize NPC actions/traffic scripts to derive targets from SnapshotSpec
- Add service_manifest.py for snapshot-declared service lifecycle
- Add replay_agent, dataset module, NPC reward coupling tests
- Graceful container resolution when Docker is available but no containers running
- Skip tests that depend on deleted snapshot fixtures
- Remove scripted_agent (replaced by replay_agent)

Files changed (40) hide show
  1. .gitignore +3 -0
  2. README.md +21 -0
  3. data/README.md +46 -0
  4. data/sft.jsonl +3 -0
  5. data/tool_info.md +10 -0
  6. docs/agent-protocols.md +2 -2
  7. docs/red-blue-agents.md +1 -1
  8. docs/synthetic-data.md +13 -0
  9. src/open_range/agents/__init__.py +2 -2
  10. src/open_range/agents/llm_agent.py +17 -4
  11. src/open_range/agents/parsing.py +40 -0
  12. src/open_range/agents/{scripted_agent.py → replay_agent.py} +56 -15
  13. src/open_range/agents/solvers.py +1 -1
  14. src/open_range/builder/npc/actions.py +80 -14
  15. src/open_range/builder/npc/channels.py +6 -0
  16. src/open_range/builder/npc/db_traffic.sh +47 -20
  17. src/open_range/builder/npc/http_traffic.sh +37 -41
  18. src/open_range/builder/npc/npc_agent.py +27 -10
  19. src/open_range/builder/npc/npc_manager.py +33 -9
  20. src/open_range/builder/npc/persona.py +7 -3
  21. src/open_range/builder/npc/ssh_traffic.sh +3 -3
  22. src/open_range/builder/renderer.py +56 -6
  23. src/open_range/builder/service_manifest.py +395 -0
  24. src/open_range/builder/templates/docker-compose.yml.j2 +10 -11
  25. src/open_range/cli.py +170 -2
  26. src/open_range/protocols.py +45 -1
  27. src/open_range/server/environment.py +55 -9
  28. src/open_range/server/rewards.py +2 -1
  29. src/open_range/training/__init__.py +12 -0
  30. src/open_range/training/dataset.py +170 -0
  31. src/open_range/training/synthetic.py +357 -24
  32. src/open_range/training/trajectory.py +79 -34
  33. tests/test_agents.py +3 -3
  34. tests/test_demo.py +1 -1
  35. tests/test_npc_reward_coupling.py +365 -0
  36. tests/test_parse_llm_response.py +2 -0
  37. tests/test_renderer_integration.py +4 -2
  38. tests/test_solvers.py +1 -1
  39. tests/test_synthetic.py +167 -1
  40. tests/test_trajectory.py +62 -12
.gitignore CHANGED
@@ -53,6 +53,9 @@ IMPLEMENTATION_PLAN.md
53
  .coverage
54
  htmlcov/
55
 
 
 
 
56
  # Pre-validated range pool (generated at startup)
57
  pool/
58
  snapshots/
 
53
  .coverage
54
  htmlcov/
55
 
56
+ # Synthetic data outputs
57
+ data/synthetic*.jsonl
58
+
59
  # Pre-validated range pool (generated at startup)
60
  pool/
61
  snapshots/
README.md CHANGED
@@ -66,6 +66,15 @@ uv run openrange synthetic-data \
66
  --output data/sft_red.jsonl \
67
  --roles red
68
 
 
 
 
 
 
 
 
 
 
69
  # Run the OpenEnv client against a running server
70
  uv run python examples/remote_client_demo.py --base-url http://localhost:8000
71
 
@@ -104,6 +113,18 @@ The deployed package exposes the standard OpenEnv `reset()`, `step()`, and `stat
104
  | Stealth (inversely coupled to Blue detection) | Availability (healthcheck fraction) |
105
  | Anti-hallucination (-0.3 per fake flag) | False positive penalty (-0.2 per NPC flagged) |
106
 
 
 
 
 
 
 
 
 
 
 
 
 
107
  **Agents** — Structural protocol: any object with `reset(briefing, role)` and `act(observation) -> command` works. Ships with `LLMRangeAgent` (litellm, any provider), `ScriptedAgent`, and `HumanAgent`.
108
 
109
  **Synthetic Data** — `open_range.training.synthetic` provides snapshot-grounded trajectory generation for SFT warm-start. It uses a fast simulated `RangeEnvironment`, optional LiteLLM teacher agents, per-episode flag randomization, and exports JSONL through `TrajectoryLogger`.
 
66
  --output data/sft_red.jsonl \
67
  --roles red
68
 
69
+ # Merge local bootstrap traces and tool context into generated output
70
+ uv run openrange synthetic-data \
71
+ --manifest manifests/tier1_basic.yaml \
72
+ --output data/synthetic_sft_5.jsonl \
73
+ --num-traces 5 \
74
+ --roles red \
75
+ --bootstrap-traces data/sft.jsonl \
76
+ --tool-info data/tool_info.md
77
+
78
  # Run the OpenEnv client against a running server
79
  uv run python examples/remote_client_demo.py --base-url http://localhost:8000
80
 
 
113
  | Stealth (inversely coupled to Blue detection) | Availability (healthcheck fraction) |
114
  | Anti-hallucination (-0.3 per fake flag) | False positive penalty (-0.2 per NPC flagged) |
115
 
116
+ **NPC Traffic** — Background noise and social engineering surface. Two levels:
117
+
118
+ - **Level 0** (shell scripts): `http_traffic.sh`, `db_traffic.sh`, `ssh_traffic.sh` generate benign traffic that Blue must filter from real attacks. Scripts discover targets dynamically (available pages, databases, tables) — no hardcoded endpoints.
119
+ - **Level 1** (LLM agents): Each NPC persona runs an autonomous workday via LiteLLM — browsing pages, sending emails, querying databases, accessing file shares. NPCs also react to incoming stimuli (phishing emails) based on their `security_awareness` profile.
120
+
121
+ All NPC actions are derived from the `SnapshotSpec` at runtime (pages, shares, tables, credentials, domain), so they generalize to any Builder-generated environment. NPC logs carry structured fields (`type`, `label`, `source`, `result`) that couple directly to Red/Blue reward signals.
122
+
123
+ Configure the NPC model via environment variable:
124
+ ```bash
125
+ export OPENRANGE_NPC_MODEL="azure/gpt-5.2-codex" # or openai/gpt-4o, anthropic/claude-haiku-4-5-20251001, ollama/llama3
126
+ ```
127
+
128
  **Agents** — Structural protocol: any object with `reset(briefing, role)` and `act(observation) -> command` works. Ships with `LLMRangeAgent` (litellm, any provider), `ScriptedAgent`, and `HumanAgent`.
129
 
130
  **Synthetic Data** — `open_range.training.synthetic` provides snapshot-grounded trajectory generation for SFT warm-start. It uses a fast simulated `RangeEnvironment`, optional LiteLLM teacher agents, per-episode flag randomization, and exports JSONL through `TrajectoryLogger`.
data/README.md ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Training Data
2
+
3
+ Seed and generated datasets for SFT warm-start live under `data/`.
4
+
5
+ ## Files
6
+
7
+ | File | Purpose |
8
+ |------|---------|
9
+ | `sft.jsonl` | Seed SFT dataset in ChatML format, including assistant tool calls and tool responses. |
10
+ | `tool_info.md` | Reusable tool catalog that can be injected into generated system prompts with `--tool-info`. |
11
+ | `synthetic*.jsonl` | Generated synthetic datasets from `openrange synthetic-data` (gitignored). |
12
+
13
+ ## Seed SFT Format
14
+
15
+ Each line in `sft.jsonl` is a single solved trajectory:
16
+
17
+ ```json
18
+ {
19
+ "messages": [
20
+ {"role": "system", "content": "..."},
21
+ {"role": "user", "content": "..."},
22
+ {"role": "assistant", "content": "...", "tool_calls": [...]},
23
+ {"role": "tool", "tool_call_id": "...", "name": "shell_command", "content": "..."}
24
+ ],
25
+ "metadata": {"source": "bootstrap", "success": true},
26
+ "ground_truth_flag": "FLAG{...}",
27
+ "optimal_steps": 8
28
+ }
29
+ ```
30
+
31
+ ## Generating Synthetic Data
32
+
33
+ Use the seed file as bootstrap context and merge newly generated OpenRange traces into a single output:
34
+
35
+ ```bash
36
+ uv run --extra synthetic openrange synthetic-data \
37
+ --manifest manifests/tier1_basic.yaml \
38
+ --output data/synthetic_sft_5.jsonl \
39
+ --num-traces 5 \
40
+ --roles red \
41
+ --teacher-model azure/gpt-5.2-codex \
42
+ --bootstrap-traces data/sft.jsonl \
43
+ --tool-info data/tool_info.md
44
+ ```
45
+
46
+ The output file keeps the imported bootstrap records intact and appends the generated OpenRange records after them.
data/sft.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:26c6f43a70274ba9f193eeb73b349a55970a99d280a19dae455f54708cf32841
3
+ size 14915149
data/tool_info.md ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ - `shell_command`: run one-shot shell commands and capture stdout/stderr
2
+ - `exec_command`: start an interactive PTY process
3
+ - `write_stdin`: send follow-up input to a running PTY session
4
+ - `python_code`: execute Python for local analysis or decoding
5
+ - `read_file`: inspect source or challenge files directly
6
+ - `grep`: search for indicators, flags, or suspicious patterns in files
7
+ - `file_search`: locate files by name or path fragment
8
+ - `flag_found`: submit a recovered flag immediately
9
+ - `web_search`: look up external references when the environment allows it
10
+ - `apply_patch`: edit local files when the task requires code changes
docs/agent-protocols.md CHANGED
@@ -117,7 +117,7 @@ class RangeAgent(Protocol):
117
  | Implementation | File | When to use | LLM? |
118
  |----------------|------|------------|------|
119
  | `LLMRangeAgent` | `src/open_range/agents/llm_agent.py` | Production — model-agnostic via LiteLLM | Yes (LiteLLM) |
120
- | `ScriptedAgent` | `src/open_range/agents/scripted_agent.py` | Testing/CI/demos — replays fixed command list | No |
121
  | `HumanAgent` | `src/open_range/agents/human_agent.py` | Manual play/debugging — stdin/stdout | No |
122
 
123
  ```python
@@ -162,7 +162,7 @@ class HumanAgent:
162
  def act(self, observation: str) -> str: ...
163
  ```
164
 
165
- Pre-built demo agents are also available as `ScriptedRedAgent` and `ScriptedBlueAgent` in `src/open_range/agents/scripted_agent.py`.
166
 
167
  ### Builder
168
 
 
117
  | Implementation | File | When to use | LLM? |
118
  |----------------|------|------------|------|
119
  | `LLMRangeAgent` | `src/open_range/agents/llm_agent.py` | Production — model-agnostic via LiteLLM | Yes (LiteLLM) |
120
+ | `ScriptedAgent` | `src/open_range/agents/replay_agent.py` | Testing/CI/demos — replays fixed command list | No |
121
  | `HumanAgent` | `src/open_range/agents/human_agent.py` | Manual play/debugging — stdin/stdout | No |
122
 
123
  ```python
 
162
  def act(self, observation: str) -> str: ...
163
  ```
164
 
165
+ Pre-built demo agents are also available as `ScriptedRedAgent` and `ScriptedBlueAgent` in `src/open_range/agents/replay_agent.py`.
166
 
167
  ### Builder
168
 
docs/red-blue-agents.md CHANGED
@@ -688,7 +688,7 @@ agents/
688
  ├── __init__.py # Public API (re-exports all key symbols)
689
  ├── protocol.py # RangeAgent protocol + EpisodeResult + EpisodeMetrics dataclasses
690
  ├── llm_agent.py # LLMRangeAgent (LiteLLM -- any model)
691
- ├── scripted_agent.py # ScriptedAgent, ScriptedRedAgent, ScriptedBlueAgent (demo/test)
692
  ├── human_agent.py # HumanAgent (interactive terminal)
693
  ├── prompts.py # RED_SYSTEM_PROMPT, BLUE_SYSTEM_PROMPT
694
  ├── parsing.py # extract_command() -- pull command from LLM text
 
688
  ├── __init__.py # Public API (re-exports all key symbols)
689
  ├── protocol.py # RangeAgent protocol + EpisodeResult + EpisodeMetrics dataclasses
690
  ├── llm_agent.py # LLMRangeAgent (LiteLLM -- any model)
691
+ ├── replay_agent.py # ScriptedAgent, ScriptedRedAgent, ScriptedBlueAgent (demo/test)
692
  ├── human_agent.py # HumanAgent (interactive terminal)
693
  ├── prompts.py # RED_SYSTEM_PROMPT, BLUE_SYSTEM_PROMPT
694
  ├── parsing.py # extract_command() -- pull command from LLM text
docs/synthetic-data.md CHANGED
@@ -55,6 +55,19 @@ uv run openrange synthetic-data \
55
  --roles red
56
  ```
57
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
  Generate traces from a manifest using the deterministic builder:
59
 
60
  ```bash
 
55
  --roles red
56
  ```
57
 
58
+ Merge previously collected bootstrap traces and append a reusable tool catalog to generated system prompts:
59
+
60
+ ```bash
61
+ uv run openrange synthetic-data \
62
+ --manifest manifests/tier1_basic.yaml \
63
+ --output data/synthetic_sft_5.jsonl \
64
+ --num-traces 5 \
65
+ --roles red \
66
+ --teacher-model azure/gpt-5.2-codex \
67
+ --bootstrap-traces data/sft.jsonl \
68
+ --tool-info data/tool_info.md
69
+ ```
70
+
71
  Generate traces from a manifest using the deterministic builder:
72
 
73
  ```bash
src/open_range/agents/__init__.py CHANGED
@@ -4,7 +4,7 @@ Exports:
4
  - RangeAgent: Protocol for any compatible agent
5
  - EpisodeResult, EpisodeMetrics: Trajectory/metrics dataclasses
6
  - LLMRangeAgent: LiteLLM-powered agent (any model)
7
- - ScriptedAgent, ScriptedRedAgent, ScriptedBlueAgent: Fixed-sequence agents
8
  - HumanAgent: Interactive stdin/stdout agent
9
  - run_episode: Orchestration loop
10
  - evaluate: Multi-episode evaluation harness
@@ -13,7 +13,7 @@ Exports:
13
 
14
  from open_range.agents.protocol import EpisodeMetrics, EpisodeResult, RangeAgent
15
  from open_range.agents.parsing import extract_command
16
- from open_range.agents.scripted_agent import (
17
  ScriptedAgent,
18
  ScriptedBlueAgent,
19
  ScriptedRedAgent,
 
4
  - RangeAgent: Protocol for any compatible agent
5
  - EpisodeResult, EpisodeMetrics: Trajectory/metrics dataclasses
6
  - LLMRangeAgent: LiteLLM-powered agent (any model)
7
+ - ScriptedAgent, ScriptedRedAgent, ScriptedBlueAgent: Fixed-sequence replay agents
8
  - HumanAgent: Interactive stdin/stdout agent
9
  - run_episode: Orchestration loop
10
  - evaluate: Multi-episode evaluation harness
 
13
 
14
  from open_range.agents.protocol import EpisodeMetrics, EpisodeResult, RangeAgent
15
  from open_range.agents.parsing import extract_command
16
+ from open_range.agents.replay_agent import (
17
  ScriptedAgent,
18
  ScriptedBlueAgent,
19
  ScriptedRedAgent,
src/open_range/agents/llm_agent.py CHANGED
@@ -9,6 +9,7 @@ Works with any LiteLLM-supported provider:
9
 
10
  from __future__ import annotations
11
 
 
12
  from typing import Any, Literal
13
 
14
  from open_range.agents.observation import format_observation
@@ -34,23 +35,34 @@ class LLMRangeAgent:
34
  model: str = "anthropic/claude-sonnet-4-20250514",
35
  temperature: float | None = 0.3,
36
  max_tokens: int = 512,
 
 
37
  **litellm_kwargs: Any,
38
  ) -> None:
39
  self.model = model
40
  self.temperature = temperature
41
  self.max_tokens = max_tokens
 
 
42
  self.litellm_kwargs = litellm_kwargs
43
- self.messages: list[dict[str, str]] = []
44
  self.role: str = "red"
 
 
45
 
46
  def reset(self, briefing: str, role: Literal["red", "blue"]) -> None:
47
  """Initialize conversation history with role-specific system prompt."""
48
  self.role = role
49
  system = RED_SYSTEM_PROMPT if role == "red" else BLUE_SYSTEM_PROMPT
 
 
50
  self.messages = [
51
  {"role": "system", "content": system},
52
- {"role": "user", "content": briefing},
53
  ]
 
 
 
 
54
 
55
  def act(self, observation: Any) -> str:
56
  """Call the LLM with the conversation history and return a command.
@@ -81,5 +93,6 @@ class LLMRangeAgent:
81
  response = litellm.completion(**kwargs)
82
  text = response.choices[0].message.content.strip()
83
  self.messages.append({"role": "assistant", "content": text})
84
-
85
- return extract_command(text)
 
 
9
 
10
  from __future__ import annotations
11
 
12
+ import copy
13
  from typing import Any, Literal
14
 
15
  from open_range.agents.observation import format_observation
 
35
  model: str = "anthropic/claude-sonnet-4-20250514",
36
  temperature: float | None = 0.3,
37
  max_tokens: int = 512,
38
+ bootstrap_messages: list[dict[str, Any]] | None = None,
39
+ system_suffix: str = "",
40
  **litellm_kwargs: Any,
41
  ) -> None:
42
  self.model = model
43
  self.temperature = temperature
44
  self.max_tokens = max_tokens
45
+ self.bootstrap_messages = copy.deepcopy(bootstrap_messages or [])
46
+ self.system_suffix = system_suffix.strip()
47
  self.litellm_kwargs = litellm_kwargs
48
+ self.messages: list[dict[str, Any]] = []
49
  self.role: str = "red"
50
+ self.last_response_text: str = ""
51
+ self.last_command: str = ""
52
 
53
  def reset(self, briefing: str, role: Literal["red", "blue"]) -> None:
54
  """Initialize conversation history with role-specific system prompt."""
55
  self.role = role
56
  system = RED_SYSTEM_PROMPT if role == "red" else BLUE_SYSTEM_PROMPT
57
+ if self.system_suffix:
58
+ system = f"{system}\n\n{self.system_suffix}"
59
  self.messages = [
60
  {"role": "system", "content": system},
 
61
  ]
62
+ self.messages.extend(copy.deepcopy(self.bootstrap_messages))
63
+ self.messages.append({"role": "user", "content": briefing})
64
+ self.last_response_text = ""
65
+ self.last_command = ""
66
 
67
  def act(self, observation: Any) -> str:
68
  """Call the LLM with the conversation history and return a command.
 
93
  response = litellm.completion(**kwargs)
94
  text = response.choices[0].message.content.strip()
95
  self.messages.append({"role": "assistant", "content": text})
96
+ self.last_response_text = text
97
+ self.last_command = extract_command(text)
98
+ return self.last_command
src/open_range/agents/parsing.py CHANGED
@@ -83,3 +83,43 @@ def extract_command(text: str) -> str:
83
  return first
84
 
85
  return stripped
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
  return first
84
 
85
  return stripped
86
+
87
+
88
+ def strip_command_from_response(text: str, command: str) -> str:
89
+ """Remove the extracted command from an LLM response, preserving reasoning.
90
+
91
+ This is best-effort. It handles the response patterns encouraged by the
92
+ synthetic-data prompts:
93
+ - fenced code blocks
94
+ - ``Command: ...`` lines
95
+ - a trailing bare command line
96
+ """
97
+ if not text:
98
+ return ""
99
+
100
+ stripped = text.strip()
101
+ if not command:
102
+ return stripped
103
+
104
+ command_pattern = re.escape(command.strip())
105
+
106
+ # Remove fenced blocks that only contain the command.
107
+ stripped = re.sub(
108
+ rf"```(?:bash|sh|shell|zsh)?\s*\n\s*{command_pattern}\s*```",
109
+ "",
110
+ stripped,
111
+ flags=re.IGNORECASE | re.DOTALL,
112
+ ).strip()
113
+
114
+ # Remove explicit "Command:" lines.
115
+ stripped = re.sub(
116
+ rf"(?im)^\s*(?:command|run|execute|cmd)\s*:\s*{command_pattern}\s*$",
117
+ "",
118
+ stripped,
119
+ ).strip()
120
+
121
+ # Remove a trailing bare command line.
122
+ lines = stripped.splitlines()
123
+ if lines and lines[-1].strip().strip("`") == command.strip():
124
+ lines = lines[:-1]
125
+ return "\n".join(lines).strip()
src/open_range/agents/{scripted_agent.py → replay_agent.py} RENAMED
@@ -1,22 +1,18 @@
1
- """Scripted agents for testing and demos.
2
 
3
- No LLM required -- these agents replay a fixed list of commands.
4
- Useful for integration tests, golden-path verification, and hackathon demos.
 
5
  """
6
 
7
  from __future__ import annotations
8
 
 
9
  from typing import Any, Literal
10
 
11
 
12
  class ScriptedAgent:
13
- """Replays a fixed list of commands in order.
14
-
15
- After the list is exhausted, repeats the last command (or a configurable
16
- fallback) so the episode can terminate normally.
17
-
18
- Satisfies the :class:`RangeAgent` protocol.
19
- """
20
 
21
  def __init__(
22
  self,
@@ -27,24 +23,34 @@ class ScriptedAgent:
27
  self.fallback = fallback
28
  self._step_idx = 0
29
  self.role: str = "red"
 
 
30
 
31
  def reset(self, briefing: str, role: Literal["red", "blue"]) -> None:
32
  """Reset the step counter for a new episode."""
 
33
  self._step_idx = 0
34
  self.role = role
 
 
35
 
36
  def act(self, observation: Any) -> str:
37
  """Return the next scripted command."""
 
38
  if self._step_idx < len(self.commands):
39
  cmd = self.commands[self._step_idx]
40
  self._step_idx += 1
41
- return cmd
42
- return self.fallback
 
 
 
 
43
 
 
 
 
44
 
45
- # ---------------------------------------------------------------------------
46
- # Pre-built demo agents
47
- # ---------------------------------------------------------------------------
48
 
49
  DEMO_RED_SCRIPT = [
50
  "nmap -sV 10.0.1.0/24",
@@ -76,3 +82,38 @@ class ScriptedBlueAgent(ScriptedAgent):
76
 
77
  def __init__(self) -> None:
78
  super().__init__(commands=DEMO_BLUE_SCRIPT, fallback="check_services")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Deterministic replay agents for testing, baselines, and demos.
2
 
3
+ No LLM is required. These agents replay a fixed list of commands and provide
4
+ lightweight reasoning text so synthetic trajectory export can still emit
5
+ tool-style transcripts.
6
  """
7
 
8
  from __future__ import annotations
9
 
10
+ import shlex
11
  from typing import Any, Literal
12
 
13
 
14
  class ScriptedAgent:
15
+ """Replays a fixed list of commands in order."""
 
 
 
 
 
 
16
 
17
  def __init__(
18
  self,
 
23
  self.fallback = fallback
24
  self._step_idx = 0
25
  self.role: str = "red"
26
+ self.last_response_text: str = ""
27
+ self.last_command: str = ""
28
 
29
  def reset(self, briefing: str, role: Literal["red", "blue"]) -> None:
30
  """Reset the step counter for a new episode."""
31
+ del briefing
32
  self._step_idx = 0
33
  self.role = role
34
+ self.last_response_text = ""
35
+ self.last_command = ""
36
 
37
  def act(self, observation: Any) -> str:
38
  """Return the next scripted command."""
39
+ del observation
40
  if self._step_idx < len(self.commands):
41
  cmd = self.commands[self._step_idx]
42
  self._step_idx += 1
43
+ else:
44
+ cmd = self.fallback
45
+
46
+ self.last_command = cmd
47
+ self.last_response_text = self._render_response(cmd)
48
+ return cmd
49
 
50
+ def _render_response(self, command: str) -> str:
51
+ thought = _default_reasoning(command, role=self.role)
52
+ return f"<think>\n{thought}\n</think>\nCommand: {command}"
53
 
 
 
 
54
 
55
  DEMO_RED_SCRIPT = [
56
  "nmap -sV 10.0.1.0/24",
 
82
 
83
  def __init__(self) -> None:
84
  super().__init__(commands=DEMO_BLUE_SCRIPT, fallback="check_services")
85
+
86
+
87
+ def _default_reasoning(command: str, *, role: str) -> str:
88
+ lowered = command.lower()
89
+ try:
90
+ parts = shlex.split(command)
91
+ except ValueError:
92
+ parts = command.split()
93
+
94
+ first_path = next((part for part in parts[1:] if "/" in part and not part.startswith("http")), "")
95
+ if "nmap" in lowered:
96
+ return "I need a quick service inventory before probing any likely attack paths."
97
+ if "curl" in lowered and ("union" in lowered or "select" in lowered):
98
+ return "The search endpoint is a good candidate for SQL injection, so I will test a UNION-style payload."
99
+ if "curl" in lowered:
100
+ return "I should inspect the exposed web surface to identify routes, parameters, and authentication flows."
101
+ if "mysql" in lowered:
102
+ return "I appear to have database access, so I will enumerate data stores and look for the flag-bearing table."
103
+ if lowered.startswith("cat ") and first_path:
104
+ return f"I need to inspect {first_path} directly for credentials, source code, or other embedded clues."
105
+ if lowered.startswith("grep "):
106
+ if role == "blue":
107
+ return "I need to filter the SIEM logs for indicators that confirm the suspected attack path."
108
+ return "I should search the available files for indicators, credentials, or flag material."
109
+ if lowered.startswith("find "):
110
+ return "I need a broader file inventory before I decide which artifact to inspect next."
111
+ if lowered.startswith("submit_flag "):
112
+ return "The recovered token looks promising, so I will submit it for validation now."
113
+ if lowered.startswith("submit_finding "):
114
+ return "The observed activity is strong enough to report as a concrete finding."
115
+ if lowered.startswith("patch "):
116
+ return "I have enough evidence to apply a targeted remediation for the vulnerable path."
117
+ if "check_services" in lowered:
118
+ return "Before changing anything else, I should confirm the core services are still healthy."
119
+ return "I will take the next concrete step that reduces uncertainty and moves the objective forward."
src/open_range/agents/solvers.py CHANGED
@@ -18,7 +18,7 @@ from __future__ import annotations
18
 
19
  from typing import Literal
20
 
21
- from open_range.agents.scripted_agent import ScriptedAgent
22
 
23
 
24
  # =====================================================================
 
18
 
19
  from typing import Literal
20
 
21
+ from open_range.agents.replay_agent import ScriptedAgent
22
 
23
 
24
  # =====================================================================
src/open_range/builder/npc/actions.py CHANGED
@@ -20,8 +20,9 @@ logger = logging.getLogger(__name__)
20
  class NPCActionExecutor:
21
  """Execute NPC actions inside Docker containers.
22
 
23
- At init, extracts available pages, shares, DB tables, and users from
24
- the snapshot so every action targets real resources in this environment.
 
25
  """
26
 
27
  def __init__(self, containers: ContainerSet, snapshot: SnapshotSpec) -> None:
@@ -32,6 +33,8 @@ class NPCActionExecutor:
32
  self._db_tables = _extract_db_tables(snapshot)
33
  self._users = _extract_users(snapshot)
34
  self._domain = snapshot.topology.get("domain", "corp.local")
 
 
35
 
36
  # ------------------------------------------------------------------
37
  # Routine actions (autonomous workday)
@@ -146,9 +149,11 @@ class NPCActionExecutor:
146
  query = f"SELECT * FROM {table} LIMIT 5"
147
  else:
148
  query = "SHOW TABLES"
 
 
149
  await self.containers.exec(
150
  "db",
151
- f'mysql -u app_user -p\'AppUs3r!2024\' -e "{query}" 2>/dev/null || true',
152
  )
153
  return _log(persona, "query_db", detail or f"Queried {target or 'database'}", "db:query_log")
154
 
@@ -184,7 +189,7 @@ class NPCActionExecutor:
184
  "web",
185
  f'curl -s -o /dev/null -A "Mozilla/5.0 ({username})" "{url}"',
186
  )
187
- return _log(persona, "click_link", f"Clicked: {url}", "web:access_log")
188
 
189
  async def _react_email(self, persona: NPCPersona, action: NPCAction) -> dict[str, Any]:
190
  username = _username_from_persona(persona)
@@ -196,7 +201,7 @@ class NPCActionExecutor:
196
  f"&& echo 'From: {username}@{self._domain}\\nSubject: Re\\n\\n{body}' "
197
  f"> /var/mail/{username}/sent_{ts_i}.eml",
198
  )
199
- return _log(persona, action.action, "Replied to message", "mail:spool")
200
 
201
  async def _react_share_creds(self, persona: NPCPersona, action: NPCAction) -> dict[str, Any]:
202
  username = _username_from_persona(persona)
@@ -216,7 +221,7 @@ class NPCActionExecutor:
216
  f'echo "[$(date)] CRED-LEAK: {persona.name} shared credentials" '
217
  f">> /var/log/siem/consolidated/all.log",
218
  )
219
- return _log(persona, "share_credentials", f"{persona.name} leaked credentials", "web+siem")
220
 
221
  async def _react_report(self, persona: NPCPersona, action: NPCAction) -> dict[str, Any]:
222
  detail = "; ".join(action.side_effects) if action.side_effects else "suspicious activity"
@@ -225,10 +230,10 @@ class NPCActionExecutor:
225
  f'echo "[$(date)] NPC-REPORT: {persona.name}: {detail}" '
226
  f">> /var/log/siem/consolidated/all.log",
227
  )
228
- return _log(persona, "report_to_IT", detail, "siem:alert")
229
 
230
  async def _react_ignore(self, persona: NPCPersona, action: NPCAction) -> dict[str, Any]:
231
- return _log(persona, "ignore", "Ignored stimulus", "none")
232
 
233
 
234
  # ---------------------------------------------------------------------------
@@ -237,17 +242,22 @@ class NPCActionExecutor:
237
 
238
 
239
  def _extract_web_pages(snapshot: SnapshotSpec) -> list[str]:
240
- """Extract URL paths from snapshot files dict (web:*.php -> /path)."""
 
 
 
 
241
  pages: list[str] = []
242
  for key in snapshot.files:
243
  if not key.startswith("web:"):
244
  continue
245
  path = key.split(":", 1)[1]
246
- # Convert filesystem path to URL path
247
- if "/var/www/" in path and path.endswith(".php"):
248
- url_path = path.replace("/var/www/portal", "").replace("/var/www/html", "")
249
- if url_path:
250
- pages.append(url_path)
 
251
  return pages or ["/"]
252
 
253
 
@@ -287,6 +297,38 @@ def _extract_users(snapshot: SnapshotSpec) -> list[str]:
287
  return [u["username"] for u in users if isinstance(u, dict) and "username" in u]
288
 
289
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
290
  def _username_from_persona(persona: NPCPersona) -> str:
291
  email = persona.accounts.get("email", "")
292
  if "@" in email:
@@ -295,12 +337,36 @@ def _username_from_persona(persona: NPCPersona) -> str:
295
 
296
 
297
  def _log(persona: NPCPersona, action: str, detail: str, source: str) -> dict[str, Any]:
 
298
  return {
299
  "timestamp": time.time(),
300
  "type": f"npc_{action}",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
301
  "persona": persona.name,
302
  "department": persona.department,
303
  "action": action,
304
  "detail": detail,
305
  "source": source,
 
306
  }
 
20
  class NPCActionExecutor:
21
  """Execute NPC actions inside Docker containers.
22
 
23
+ At init, extracts available pages, shares, DB tables, users, and
24
+ credentials from the snapshot so every action targets real resources
25
+ in this environment.
26
  """
27
 
28
  def __init__(self, containers: ContainerSet, snapshot: SnapshotSpec) -> None:
 
33
  self._db_tables = _extract_db_tables(snapshot)
34
  self._users = _extract_users(snapshot)
35
  self._domain = snapshot.topology.get("domain", "corp.local")
36
+ self._db_creds = _extract_db_credentials(snapshot)
37
+ self._ssh_creds = _extract_ssh_credentials(snapshot)
38
 
39
  # ------------------------------------------------------------------
40
  # Routine actions (autonomous workday)
 
149
  query = f"SELECT * FROM {table} LIMIT 5"
150
  else:
151
  query = "SHOW TABLES"
152
+ db_user, db_pass = self._db_creds
153
+ cred_flag = f"-u {db_user} -p'{db_pass}'" if db_pass else f"-u {db_user}"
154
  await self.containers.exec(
155
  "db",
156
+ f'mysql {cred_flag} -e "{query}" 2>/dev/null || true',
157
  )
158
  return _log(persona, "query_db", detail or f"Queried {target or 'database'}", "db:query_log")
159
 
 
189
  "web",
190
  f'curl -s -o /dev/null -A "Mozilla/5.0 ({username})" "{url}"',
191
  )
192
+ return _se_log(persona, "click_link", f"Clicked: {url}", "web:access_log", result="success")
193
 
194
  async def _react_email(self, persona: NPCPersona, action: NPCAction) -> dict[str, Any]:
195
  username = _username_from_persona(persona)
 
201
  f"&& echo 'From: {username}@{self._domain}\\nSubject: Re\\n\\n{body}' "
202
  f"> /var/mail/{username}/sent_{ts_i}.eml",
203
  )
204
+ return _se_log(persona, action.action, "Replied to message", "mail:spool", result="success")
205
 
206
  async def _react_share_creds(self, persona: NPCPersona, action: NPCAction) -> dict[str, Any]:
207
  username = _username_from_persona(persona)
 
221
  f'echo "[$(date)] CRED-LEAK: {persona.name} shared credentials" '
222
  f">> /var/log/siem/consolidated/all.log",
223
  )
224
+ return _se_log(persona, "share_credentials", f"{persona.name} leaked credentials", "web+siem", result="success")
225
 
226
  async def _react_report(self, persona: NPCPersona, action: NPCAction) -> dict[str, Any]:
227
  detail = "; ".join(action.side_effects) if action.side_effects else "suspicious activity"
 
230
  f'echo "[$(date)] NPC-REPORT: {persona.name}: {detail}" '
231
  f">> /var/log/siem/consolidated/all.log",
232
  )
233
+ return _se_log(persona, "report_to_IT", detail, "siem:alert", result="blocked")
234
 
235
  async def _react_ignore(self, persona: NPCPersona, action: NPCAction) -> dict[str, Any]:
236
+ return _se_log(persona, "ignore", "Ignored stimulus", "none", result="blocked")
237
 
238
 
239
  # ---------------------------------------------------------------------------
 
242
 
243
 
244
  def _extract_web_pages(snapshot: SnapshotSpec) -> list[str]:
245
+ """Extract URL paths from snapshot files dict (web:*.php -> /path).
246
+
247
+ Handles arbitrary doc roots by stripping any ``/var/www/<app>/`` prefix
248
+ to produce URL paths.
249
+ """
250
  pages: list[str] = []
251
  for key in snapshot.files:
252
  if not key.startswith("web:"):
253
  continue
254
  path = key.split(":", 1)[1]
255
+ if not path.endswith((".php", ".html", ".htm")):
256
+ continue
257
+ # Strip doc root: /var/www/<anything>/ -> /
258
+ url_path = re.sub(r"^/var/www/[^/]+", "", path)
259
+ if url_path:
260
+ pages.append(url_path)
261
  return pages or ["/"]
262
 
263
 
 
297
  return [u["username"] for u in users if isinstance(u, dict) and "username" in u]
298
 
299
 
300
+ def _extract_db_credentials(snapshot: SnapshotSpec) -> tuple[str, str]:
301
+ """Extract DB credentials from topology users. Fallback to defaults."""
302
+ users = snapshot.topology.get("users", [])
303
+ for user in users:
304
+ if not isinstance(user, dict):
305
+ continue
306
+ hosts = user.get("hosts", [])
307
+ if "db" in hosts:
308
+ return user.get("username", "app_user"), user.get("password", "")
309
+ return "app_user", "AppUs3r!2024"
310
+
311
+
312
+ def _extract_ssh_credentials(snapshot: SnapshotSpec) -> tuple[str, str]:
313
+ """Extract SSH admin credentials from topology users. Fallback to defaults."""
314
+ users = snapshot.topology.get("users", [])
315
+ # First pass: look for explicit admin roles
316
+ for user in users:
317
+ if not isinstance(user, dict):
318
+ continue
319
+ role = user.get("role", "")
320
+ if role in ("admin", "sysadmin", "root"):
321
+ return user.get("username", "admin"), user.get("password", "")
322
+ # Second pass: look for users on SSH-accessible hosts
323
+ for user in users:
324
+ if not isinstance(user, dict):
325
+ continue
326
+ hosts = user.get("hosts", [])
327
+ if any(h in hosts for h in ("web", "files", "ldap", "siem")):
328
+ return user.get("username", "admin"), user.get("password", "")
329
+ return "admin", "Adm1n!2024"
330
+
331
+
332
  def _username_from_persona(persona: NPCPersona) -> str:
333
  email = persona.accounts.get("email", "")
334
  if "@" in email:
 
337
 
338
 
339
  def _log(persona: NPCPersona, action: str, detail: str, source: str) -> dict[str, Any]:
340
+ """Log a routine (benign) NPC action."""
341
  return {
342
  "timestamp": time.time(),
343
  "type": f"npc_{action}",
344
+ "label": "benign",
345
+ "persona": persona.name,
346
+ "department": persona.department,
347
+ "action": action,
348
+ "detail": detail,
349
+ "source": source,
350
+ }
351
+
352
+
353
+ def _se_log(
354
+ persona: NPCPersona,
355
+ action: str,
356
+ detail: str,
357
+ source: str,
358
+ *,
359
+ result: str = "unknown",
360
+ ) -> dict[str, Any]:
361
+ """Log a social-engineering reactive NPC action for reward coupling."""
362
+ return {
363
+ "timestamp": time.time(),
364
+ "type": "social_engineering",
365
+ "label": "reactive",
366
  "persona": persona.name,
367
  "department": persona.department,
368
  "action": action,
369
  "detail": detail,
370
  "source": source,
371
+ "result": result,
372
  }
src/open_range/builder/npc/channels.py CHANGED
@@ -76,11 +76,13 @@ class ChatChannel:
76
  return [
77
  {
78
  "type": "chat",
 
79
  "sender": m.sender,
80
  "recipient": m.recipient,
81
  "content": m.content,
82
  "timestamp": m.timestamp,
83
  "channel": m.channel,
 
84
  }
85
  for m in self._messages
86
  ]
@@ -183,6 +185,7 @@ class VoiceChannel:
183
  return [
184
  {
185
  "type": "voice",
 
186
  "caller": c.caller,
187
  "callee": c.callee,
188
  "pretext": c.pretext,
@@ -190,6 +193,7 @@ class VoiceChannel:
190
  "transcript": c.transcript,
191
  "timestamp": c.timestamp,
192
  "duration_s": c.duration_s,
 
193
  }
194
  for c in self._calls
195
  ]
@@ -271,6 +275,7 @@ class DocumentChannel:
271
  return [
272
  {
273
  "type": "document",
 
274
  "sender": d.sender,
275
  "recipient": d.recipient,
276
  "filename": d.filename,
@@ -278,6 +283,7 @@ class DocumentChannel:
278
  "timestamp": d.timestamp,
279
  "accessed": d.accessed,
280
  "access_decision": d.access_decision,
 
281
  }
282
  for d in self._documents
283
  ]
 
76
  return [
77
  {
78
  "type": "chat",
79
+ "label": "benign",
80
  "sender": m.sender,
81
  "recipient": m.recipient,
82
  "content": m.content,
83
  "timestamp": m.timestamp,
84
  "channel": m.channel,
85
+ "source": f"chat:{m.channel}",
86
  }
87
  for m in self._messages
88
  ]
 
185
  return [
186
  {
187
  "type": "voice",
188
+ "label": "benign",
189
  "caller": c.caller,
190
  "callee": c.callee,
191
  "pretext": c.pretext,
 
193
  "transcript": c.transcript,
194
  "timestamp": c.timestamp,
195
  "duration_s": c.duration_s,
196
+ "source": "voice:phone",
197
  }
198
  for c in self._calls
199
  ]
 
275
  return [
276
  {
277
  "type": "document",
278
+ "label": "benign",
279
  "sender": d.sender,
280
  "recipient": d.recipient,
281
  "filename": d.filename,
 
283
  "timestamp": d.timestamp,
284
  "accessed": d.accessed,
285
  "access_decision": d.access_decision,
286
+ "source": f"document:{d.filename}",
287
  }
288
  for d in self._documents
289
  ]
src/open_range/builder/npc/db_traffic.sh CHANGED
@@ -1,8 +1,8 @@
1
  #!/usr/bin/env bash
2
  # Level 0 NPC: Database traffic generator (mysql query loop)
3
  #
4
- # Simulates application database queries -- SELECT, INSERT operations
5
- # that a normal web application backend would generate.
6
  #
7
  # Environment variables:
8
  # DB_HOST - hostname of the database server (default: db)
@@ -15,29 +15,56 @@ RATE_LAMBDA="${RATE_LAMBDA:-20}"
15
 
16
  INTERVAL=$(awk "BEGIN {printf \"%.1f\", 60.0 / $RATE_LAMBDA}")
17
 
18
- DB_NAME="referral_db"
 
 
19
 
20
- # Application-level queries that a normal app would run
21
- QUERIES=(
22
- "SELECT id, first_name, last_name FROM ${DB_NAME}.patients LIMIT 5"
23
- "SELECT id, status, specialist FROM ${DB_NAME}.patient_referrals ORDER BY created_at DESC LIMIT 3"
24
- "SELECT COUNT(*) FROM ${DB_NAME}.patient_referrals WHERE status='Pending'"
25
- "SELECT id, amount_due, status FROM ${DB_NAME}.billing WHERE status='Open'"
26
- "SELECT username, role, department FROM ${DB_NAME}.users LIMIT 10"
27
- "UPDATE ${DB_NAME}.billing SET last_updated=CURDATE() WHERE id=5001"
28
- "SELECT p.first_name, p.last_name, r.status FROM ${DB_NAME}.patients p JOIN ${DB_NAME}.patient_referrals r ON p.id=r.patient_id LIMIT 5"
29
- "INSERT INTO ${DB_NAME}.access_log (user_id, action, ip) VALUES (3, 'view_referrals', '10.0.1.10')"
30
- )
31
 
32
- # App database credentials (non-privileged)
33
- DB_USER="app_user"
34
- DB_PASS="AppUs3r!2024"
 
 
 
 
35
 
36
- echo "[NPC-DB] Starting DB traffic to ${DB_HOST} at ${RATE_LAMBDA} queries/min"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
 
38
  while true; do
39
- IDX=$(( RANDOM % ${#QUERIES[@]} ))
40
- QUERY="${QUERIES[$IDX]}"
 
 
 
 
 
 
 
41
 
42
  mysql -h "${DB_HOST}" \
43
  -u "${DB_USER}" \
 
1
  #!/usr/bin/env bash
2
  # Level 0 NPC: Database traffic generator (mysql query loop)
3
  #
4
+ # Simulates application database queries. Discovers available databases
5
+ # and tables dynamically so it adapts to any LLM-generated environment.
6
  #
7
  # Environment variables:
8
  # DB_HOST - hostname of the database server (default: db)
 
15
 
16
  INTERVAL=$(awk "BEGIN {printf \"%.1f\", 60.0 / $RATE_LAMBDA}")
17
 
18
+ # App database credentials -- resolved from env or defaults
19
+ DB_USER="${DB_USER:-app_user}"
20
+ DB_PASS="${DB_PASS:-AppUs3r!2024}"
21
 
22
+ # Discover databases (skip system DBs)
23
+ discover_db() {
24
+ mysql -h "${DB_HOST}" -u "${DB_USER}" -p"${DB_PASS}" \
25
+ -N -e "SHOW DATABASES" 2>/dev/null \
26
+ | grep -v -E '^(information_schema|mysql|performance_schema|sys)$' \
27
+ | head -1
28
+ }
 
 
 
 
29
 
30
+ # Discover tables in a database
31
+ discover_tables() {
32
+ local db="$1"
33
+ mysql -h "${DB_HOST}" -u "${DB_USER}" -p"${DB_PASS}" \
34
+ -N -e "SHOW TABLES FROM ${db}" 2>/dev/null \
35
+ | head -10
36
+ }
37
 
38
+ # Wait for DB to be ready
39
+ for i in $(seq 1 10); do
40
+ DB_NAME=$(discover_db) && [ -n "$DB_NAME" ] && break
41
+ sleep 3
42
+ done
43
+
44
+ if [ -z "${DB_NAME:-}" ]; then
45
+ echo "[NPC-DB] No application database found, exiting"
46
+ exit 0
47
+ fi
48
+
49
+ # Get available tables
50
+ mapfile -t TABLES < <(discover_tables "$DB_NAME")
51
+ if [ ${#TABLES[@]} -eq 0 ]; then
52
+ echo "[NPC-DB] No tables found in ${DB_NAME}, exiting"
53
+ exit 0
54
+ fi
55
+
56
+ echo "[NPC-DB] Starting DB traffic to ${DB_HOST}/${DB_NAME} at ${RATE_LAMBDA} queries/min (${#TABLES[@]} tables)"
57
 
58
  while true; do
59
+ IDX=$(( RANDOM % ${#TABLES[@]} ))
60
+ TABLE="${TABLES[$IDX]}"
61
+
62
+ # Alternate between safe read queries
63
+ case $(( RANDOM % 3 )) in
64
+ 0) QUERY="SELECT * FROM ${DB_NAME}.${TABLE} LIMIT 5" ;;
65
+ 1) QUERY="SELECT COUNT(*) FROM ${DB_NAME}.${TABLE}" ;;
66
+ 2) QUERY="DESCRIBE ${DB_NAME}.${TABLE}" ;;
67
+ esac
68
 
69
  mysql -h "${DB_HOST}" \
70
  -u "${DB_USER}" \
src/open_range/builder/npc/http_traffic.sh CHANGED
@@ -2,7 +2,8 @@
2
  # Level 0 NPC: HTTP traffic generator (curl loop)
3
  #
4
  # Generates benign web traffic to simulate normal user browsing.
5
- # All requests are labeled as NPC traffic in the access log via User-Agent.
 
6
  #
7
  # Environment variables:
8
  # WEB_HOST - hostname of the web server (default: web)
@@ -13,50 +14,45 @@ set -euo pipefail
13
  WEB_HOST="${WEB_HOST:-web}"
14
  RATE_LAMBDA="${RATE_LAMBDA:-30}"
15
 
16
- # Calculate sleep interval in seconds
17
  INTERVAL=$(awk "BEGIN {printf \"%.1f\", 60.0 / $RATE_LAMBDA}")
18
 
19
- # Common pages and endpoints that a normal user would visit
20
- PAGES=(
21
- "/"
22
- "/index.php"
23
- "/dashboard.php"
24
- "/lookup.php?last=Smith"
25
- "/lookup.php?last=Hart"
26
- "/lookup.php?last=Bishop"
27
- "/referral_submit.php"
28
- "/api/referral_status.php?id=1001"
29
- "/api/referral_status.php?id=1002"
30
- "/admin/compliance_report.php"
31
- "/logout.php"
32
- )
33
-
34
- # Common form submissions
35
- FORMS=(
36
- "-d 'username=mgarcia&password=Welcome2024!' http://${WEB_HOST}/index.php"
37
- "-d 'username=kwilliams&password=Welcome2024!' http://${WEB_HOST}/index.php"
38
- "-d 'patient_id=1&clinic=Northside&specialist=Dr.Patel&diagnosis=Cardiology' http://${WEB_HOST}/referral_submit.php"
39
- )
40
-
41
- echo "[NPC-HTTP] Starting HTTP traffic to ${WEB_HOST} at ${RATE_LAMBDA} req/min"
 
 
 
 
 
 
 
42
 
43
  while true; do
44
- # 80% GET requests, 20% POST requests
45
- if (( RANDOM % 5 == 0 )); then
46
- # POST request (form submission)
47
- IDX=$(( RANDOM % ${#FORMS[@]} ))
48
- FORM="${FORMS[$IDX]}"
49
- curl -s -o /dev/null -w '' \
50
- -A "NPC-Traffic/1.0 (benign)" \
51
- -X POST ${FORM} 2>/dev/null || true
52
- else
53
- # GET request (page browse)
54
- IDX=$(( RANDOM % ${#PAGES[@]} ))
55
- PAGE="${PAGES[$IDX]}"
56
- curl -s -o /dev/null -w '' \
57
- -A "NPC-Traffic/1.0 (benign)" \
58
- "http://${WEB_HOST}${PAGE}" 2>/dev/null || true
59
- fi
60
 
61
  sleep "${INTERVAL}"
62
  done
 
2
  # Level 0 NPC: HTTP traffic generator (curl loop)
3
  #
4
  # Generates benign web traffic to simulate normal user browsing.
5
+ # Discovers available pages dynamically from the web server's document
6
+ # root so it adapts to any LLM-generated environment.
7
  #
8
  # Environment variables:
9
  # WEB_HOST - hostname of the web server (default: web)
 
14
  WEB_HOST="${WEB_HOST:-web}"
15
  RATE_LAMBDA="${RATE_LAMBDA:-30}"
16
 
 
17
  INTERVAL=$(awk "BEGIN {printf \"%.1f\", 60.0 / $RATE_LAMBDA}")
18
 
19
+ # Discover available pages from the web root
20
+ discover_pages() {
21
+ local pages=("/")
22
+ # Try common doc roots
23
+ for root in /var/www/html /var/www/portal /var/www; do
24
+ if [ -d "$root" ]; then
25
+ while IFS= read -r f; do
26
+ # Strip doc root to get URL path
27
+ local url_path="${f#$root}"
28
+ [ -n "$url_path" ] && pages+=("$url_path")
29
+ done < <(find "$root" -maxdepth 2 -name '*.php' -o -name '*.html' 2>/dev/null | head -20)
30
+ break
31
+ fi
32
+ done
33
+ # Fallback: probe common endpoints
34
+ if [ ${#pages[@]} -le 1 ]; then
35
+ for p in /index.php /index.html /login.php /dashboard.php; do
36
+ if curl -s -o /dev/null -w '%{http_code}' "http://${WEB_HOST}${p}" 2>/dev/null | grep -q '^[23]'; then
37
+ pages+=("$p")
38
+ fi
39
+ done
40
+ fi
41
+ printf '%s\n' "${pages[@]}"
42
+ }
43
+
44
+ # Build page list once at startup
45
+ mapfile -t PAGES < <(discover_pages)
46
+ [ ${#PAGES[@]} -eq 0 ] && PAGES=("/")
47
+
48
+ echo "[NPC-HTTP] Starting HTTP traffic to ${WEB_HOST} at ${RATE_LAMBDA} req/min (${#PAGES[@]} pages)"
49
 
50
  while true; do
51
+ IDX=$(( RANDOM % ${#PAGES[@]} ))
52
+ PAGE="${PAGES[$IDX]}"
53
+ curl -s -o /dev/null -w '' \
54
+ -A "NPC-Traffic/1.0 (benign)" \
55
+ "http://${WEB_HOST}${PAGE}" 2>/dev/null || true
 
 
 
 
 
 
 
 
 
 
 
56
 
57
  sleep "${INTERVAL}"
58
  done
src/open_range/builder/npc/npc_agent.py CHANGED
@@ -228,26 +228,43 @@ class LLMNPCAgent:
228
  self._actions.append(log_entry)
229
  logger.debug("NPC %s: %s", persona.name, log_entry.get("detail", ""))
230
 
231
- # --- Phase 2: Check mailbox ---
 
 
232
  try:
233
  mail_output = await containers.exec(
234
  "mail",
235
- f"find /var/mail/{mail_user} "
 
236
  f"-newer /tmp/.npc_check_{mail_user} "
237
- f"-type f 2>/dev/null | head -1",
238
  )
239
  await containers.exec("mail", f"touch /tmp/.npc_check_{mail_user}")
240
 
241
  if mail_output and mail_output.strip():
242
- email_file = mail_output.strip().split("\n")[0]
243
- content = await containers.exec(
244
- "mail", f"head -50 '{email_file}' 2>/dev/null || true",
245
- )
246
- if content and content.strip():
 
 
 
 
 
 
 
 
 
 
 
 
247
  stimulus = Stimulus(
248
- type="email", sender="unknown",
249
- subject="Incoming message",
 
250
  content=content[:500],
 
251
  )
252
  react = await self.decide(persona, stimulus)
253
  react_log = await executor.execute(persona, react)
 
228
  self._actions.append(log_entry)
229
  logger.debug("NPC %s: %s", persona.name, log_entry.get("detail", ""))
230
 
231
+ # --- Phase 2: Check mailbox for incoming stimuli ---
232
+ # Red may send real phishing emails via SMTP. Check multiple
233
+ # mail spool locations for new messages.
234
  try:
235
  mail_output = await containers.exec(
236
  "mail",
237
+ f"{{ find /var/spool/mail/ /var/mail/ "
238
+ f"/home/{mail_user}/Maildir/new/ "
239
  f"-newer /tmp/.npc_check_{mail_user} "
240
+ f"-type f 2>/dev/null || true; }} | head -3",
241
  )
242
  await containers.exec("mail", f"touch /tmp/.npc_check_{mail_user}")
243
 
244
  if mail_output and mail_output.strip():
245
+ for email_file in mail_output.strip().split("\n")[:3]:
246
+ email_file = email_file.strip()
247
+ if not email_file:
248
+ continue
249
+ content = await containers.exec(
250
+ "mail", f"head -50 '{email_file}' 2>/dev/null || true",
251
+ )
252
+ if not content or not content.strip():
253
+ continue
254
+ # Extract sender from email headers
255
+ sender = "unknown"
256
+ subject = "Incoming message"
257
+ for line in content.split("\n")[:20]:
258
+ if line.lower().startswith("from:"):
259
+ sender = line.split(":", 1)[1].strip()
260
+ elif line.lower().startswith("subject:"):
261
+ subject = line.split(":", 1)[1].strip()
262
  stimulus = Stimulus(
263
+ type="email",
264
+ sender=sender,
265
+ subject=subject,
266
  content=content[:500],
267
+ plausibility=0.7,
268
  )
269
  react = await self.decide(persona, stimulus)
270
  react_log = await executor.execute(persona, react)
src/open_range/builder/npc/npc_manager.py CHANGED
@@ -89,10 +89,11 @@ def _container_for_script(script_name: str, topology: dict[str, Any]) -> str:
89
 
90
 
91
  def _resolve_env_vars(topology: dict[str, Any], rate_lambda: float) -> dict[str, str]:
92
- """Build environment variables by resolving roles from the topology.
93
 
94
- Instead of hardcoding ``WEB_HOST=web``, this finds the host whose
95
- services list contains web/nginx/etc and maps the role to its name.
 
96
  """
97
  hosts = _hosts_from_topology(topology)
98
  env: dict[str, str] = {"RATE_LAMBDA": str(int(rate_lambda))}
@@ -103,6 +104,21 @@ def _resolve_env_vars(topology: dict[str, Any], rate_lambda: float) -> dict[str,
103
  env[role] = host["name"]
104
  break
105
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
106
  return env
107
 
108
 
@@ -127,10 +143,20 @@ def _derive_scripts_from_topology(topology: dict[str, Any]) -> list[str]:
127
 
128
 
129
  class NPCManager:
130
- """Start and stop NPC background traffic for a snapshot."""
 
 
 
 
 
 
 
 
 
131
 
132
- def __init__(self, mock_mode: bool = False) -> None:
133
  self._mock_mode = mock_mode
 
134
  self._processes: list[asyncio.subprocess.Process] = []
135
  self._tasks: list[asyncio.Task[Any]] = []
136
  self._running = False
@@ -261,9 +287,9 @@ class NPCManager:
261
  from open_range.builder.npc.npc_agent import LLMNPCAgent
262
 
263
  for persona in snapshot.npc_personas:
264
- agent = LLMNPCAgent()
265
  task = asyncio.create_task(
266
- agent.run_loop(persona, containers),
267
  name=f"npc_{persona.name}",
268
  )
269
  self._tasks.append(task)
@@ -354,8 +380,6 @@ class NPCManager:
354
 
355
  self._running = True
356
  self._containers = containers
357
- npc_cfg = snapshot.npc_traffic
358
-
359
  # Re-initialise channels for the new episode
360
  self.channels = {
361
  "chat": ChatChannel(),
 
89
 
90
 
91
  def _resolve_env_vars(topology: dict[str, Any], rate_lambda: float) -> dict[str, str]:
92
+ """Build environment variables by resolving roles and credentials from topology.
93
 
94
+ Resolves host roles (WEB_HOST, DB_HOST, etc.) and credentials (DB_USER,
95
+ DB_PASS, SSH_USER, SSH_PASS) from the topology so shell scripts don't
96
+ need hardcoded values.
97
  """
98
  hosts = _hosts_from_topology(topology)
99
  env: dict[str, str] = {"RATE_LAMBDA": str(int(rate_lambda))}
 
104
  env[role] = host["name"]
105
  break
106
 
107
+ # Pass DB and SSH credentials from topology to shell scripts
108
+ users = topology.get("users", [])
109
+ for user in users:
110
+ if not isinstance(user, dict):
111
+ continue
112
+ hosts_list = user.get("hosts", [])
113
+ if "db" in hosts_list and "DB_USER" not in env:
114
+ env["DB_USER"] = user.get("username", "app_user")
115
+ env["DB_PASS"] = user.get("password", "AppUs3r!2024")
116
+ if any(h in hosts_list for h in ("web", "files", "ldap", "siem")):
117
+ role = user.get("role", "")
118
+ if role in ("admin", "sysadmin", "root") and "SSH_USER" not in env:
119
+ env["SSH_USER"] = user.get("username", "admin")
120
+ env["SSH_PASS"] = user.get("password", "Adm1n!2024")
121
+
122
  return env
123
 
124
 
 
143
 
144
 
145
  class NPCManager:
146
+ """Start and stop NPC background traffic for a snapshot.
147
+
148
+ Args:
149
+ mock_mode: When True, skip Docker exec and LLM calls (unit tests).
150
+ model: LiteLLM model string for Level 1 NPC agents.
151
+ Defaults to ``OPENRANGE_NPC_MODEL`` env var, then
152
+ ``azure/gpt-5.2-codex``. Any LiteLLM-supported model works
153
+ (e.g. ``openai/gpt-4o``, ``anthropic/claude-haiku-4-5-20251001``,
154
+ ``ollama/llama3``).
155
+ """
156
 
157
+ def __init__(self, mock_mode: bool = False, model: str | None = None) -> None:
158
  self._mock_mode = mock_mode
159
+ self._model = model # passed to LLMNPCAgent
160
  self._processes: list[asyncio.subprocess.Process] = []
161
  self._tasks: list[asyncio.Task[Any]] = []
162
  self._running = False
 
287
  from open_range.builder.npc.npc_agent import LLMNPCAgent
288
 
289
  for persona in snapshot.npc_personas:
290
+ agent = LLMNPCAgent(model=self._model)
291
  task = asyncio.create_task(
292
+ agent.run_loop(persona, containers, snapshot),
293
  name=f"npc_{persona.name}",
294
  )
295
  self._tasks.append(task)
 
380
 
381
  self._running = True
382
  self._containers = containers
 
 
383
  # Re-initialise channels for the new episode
384
  self.channels = {
385
  "chat": ChatChannel(),
src/open_range/builder/npc/persona.py CHANGED
@@ -12,11 +12,15 @@ from open_range.protocols import NPCPersona
12
  __all__ = ["NPCPersona", "default_personas"]
13
 
14
 
15
- def default_personas() -> list[NPCPersona]:
16
  """Return a default set of NPC personas for testing.
17
 
18
  Two personas with contrasting security awareness levels:
19
  a low-awareness marketing employee and a high-awareness CISO.
 
 
 
 
20
  """
21
  return [
22
  NPCPersona(
@@ -41,7 +45,7 @@ def default_personas() -> list[NPCPersona]:
41
  ],
42
  },
43
  accounts={
44
- "email": "jsmith@acmecorp.local",
45
  "ldap": "jsmith",
46
  "smb_shares": "marketing,shared",
47
  },
@@ -70,7 +74,7 @@ def default_personas() -> list[NPCPersona]:
70
  ],
71
  },
72
  accounts={
73
- "email": "dchen@acmecorp.local",
74
  "ldap": "dchen",
75
  "smb_shares": "security,executive",
76
  },
 
12
  __all__ = ["NPCPersona", "default_personas"]
13
 
14
 
15
+ def default_personas(domain: str = "corp.local") -> list[NPCPersona]:
16
  """Return a default set of NPC personas for testing.
17
 
18
  Two personas with contrasting security awareness levels:
19
  a low-awareness marketing employee and a high-awareness CISO.
20
+
21
+ Args:
22
+ domain: Email domain to use. Derived from snapshot topology at
23
+ runtime so personas match the generated environment.
24
  """
25
  return [
26
  NPCPersona(
 
45
  ],
46
  },
47
  accounts={
48
+ "email": f"jsmith@{domain}",
49
  "ldap": "jsmith",
50
  "smb_shares": "marketing,shared",
51
  },
 
74
  ],
75
  },
76
  accounts={
77
+ "email": f"dchen@{domain}",
78
  "ldap": "dchen",
79
  "smb_shares": "security,executive",
80
  },
src/open_range/builder/npc/ssh_traffic.sh CHANGED
@@ -31,9 +31,9 @@ COMMANDS=(
31
  "w"
32
  )
33
 
34
- # Credentials for benign SSH sessions
35
- SSH_USER="admin"
36
- SSH_PASS="Adm1n!2024"
37
 
38
  HOSTS=("${WEB_HOST}" "${DB_HOST}")
39
 
 
31
  "w"
32
  )
33
 
34
+ # Credentials for benign SSH sessions -- resolved from env or defaults
35
+ SSH_USER="${SSH_USER:-admin}"
36
+ SSH_PASS="${SSH_PASS:-Adm1n!2024}"
37
 
38
  HOSTS=("${WEB_HOST}" "${DB_HOST}")
39
 
src/open_range/builder/renderer.py CHANGED
@@ -15,6 +15,7 @@ from typing import Any
15
 
16
  import jinja2
17
 
 
18
  from open_range.protocols import SnapshotSpec
19
 
20
  logger = logging.getLogger(__name__)
@@ -81,13 +82,42 @@ class SnapshotRenderer:
81
  encoding="utf-8",
82
  )
83
  logger.info("Rendered %d payload artifact(s) -> %s", len(payload_manifest), manifest_path)
 
 
 
 
84
  logger.info(
85
- "SnapshotRenderer: rendering complete (%d templates, %d payloads)",
86
  len(_TEMPLATE_MAP),
87
  len(payload_manifest),
 
88
  )
89
  return output_dir
90
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
  def _render_payloads(self, spec: SnapshotSpec, output_dir: Path) -> dict[str, str]:
92
  payload_manifest: dict[str, str] = {}
93
  for key, content in spec.files.items():
@@ -176,6 +206,9 @@ def _build_context(spec: SnapshotSpec) -> dict[str, Any]:
176
  has_download,
177
  )
178
 
 
 
 
179
  context: dict[str, Any] = {
180
  # docker-compose.yml.j2
181
  "snapshot_id": topology.get("snapshot_id", "generated"),
@@ -183,12 +216,15 @@ def _build_context(spec: SnapshotSpec) -> dict[str, Any]:
183
  "hosts": hosts,
184
  "host_names": host_names,
185
  "db_host": "db",
186
- "db_user": _find_db_user(users),
187
- "db_pass": _find_db_pass(users),
 
 
188
  "mysql_root_password": topology.get("mysql_root_password", _find_mysql_root_pass(users)),
189
- "domain": topology.get("domain", "acmecorp.local"),
190
- "org_name": topology.get("org_name", "AcmeCorp"),
191
- "ldap_admin_pass": "LdapAdm1n!",
 
192
  # Dockerfile.web.j2
193
  "users": users,
194
  "app_files": app_files,
@@ -304,3 +340,17 @@ def _find_mysql_root_pass(users: list[dict[str, Any]]) -> str:
304
  if u.get("username") == "admin" and "db" in u.get("hosts", []):
305
  return u.get("password", "r00tP@ss!")
306
  return "r00tP@ss!"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
  import jinja2
17
 
18
+ from open_range.builder.service_manifest import generate_service_specs
19
  from open_range.protocols import SnapshotSpec
20
 
21
  logger = logging.getLogger(__name__)
 
82
  encoding="utf-8",
83
  )
84
  logger.info("Rendered %d payload artifact(s) -> %s", len(payload_manifest), manifest_path)
85
+
86
+ # Generate ServiceSpec entries from compose + topology
87
+ self._build_service_specs(spec)
88
+
89
  logger.info(
90
+ "SnapshotRenderer: rendering complete (%d templates, %d payloads, %d services)",
91
  len(_TEMPLATE_MAP),
92
  len(payload_manifest),
93
+ len(spec.services),
94
  )
95
  return output_dir
96
 
97
+ def _build_service_specs(self, spec: SnapshotSpec) -> None:
98
+ """Populate ``spec.services`` from compose and topology.
99
+
100
+ Delegates to :func:`generate_service_specs` which maps Docker
101
+ image names (or topology host names) to subprocess-mode daemon
102
+ lifecycle declarations. Only runs if the spec does not already
103
+ have services declared (idempotent).
104
+ """
105
+ if spec.services:
106
+ logger.debug("ServiceSpec entries already present — skipping generation")
107
+ return
108
+
109
+ svc_specs = generate_service_specs(
110
+ compose=spec.compose,
111
+ topology=spec.topology,
112
+ )
113
+ spec.services = svc_specs
114
+ if svc_specs:
115
+ logger.info(
116
+ "Generated %d ServiceSpec entries: %s",
117
+ len(svc_specs),
118
+ [s.daemon for s in svc_specs],
119
+ )
120
+
121
  def _render_payloads(self, spec: SnapshotSpec, output_dir: Path) -> dict[str, str]:
122
  payload_manifest: dict[str, str] = {}
123
  for key, content in spec.files.items():
 
206
  has_download,
207
  )
208
 
209
+ db_user = _find_db_user(users)
210
+ db_pass = _find_db_pass(users)
211
+
212
  context: dict[str, Any] = {
213
  # docker-compose.yml.j2
214
  "snapshot_id": topology.get("snapshot_id", "generated"),
 
216
  "hosts": hosts,
217
  "host_names": host_names,
218
  "db_host": "db",
219
+ "db_user": db_user,
220
+ "db_pass": db_pass,
221
+ "db_name": topology.get("db_name", "app_db"),
222
+ "db_password": db_pass,
223
  "mysql_root_password": topology.get("mysql_root_password", _find_mysql_root_pass(users)),
224
+ "domain": topology.get("domain", "corp.local"),
225
+ "org_name": topology.get("org_name", "Corp"),
226
+ "ldap_admin_pass": topology.get("ldap_admin_pass", "LdapAdm1n!"),
227
+ "smb_shares": _find_smb_shares(spec),
228
  # Dockerfile.web.j2
229
  "users": users,
230
  "app_files": app_files,
 
340
  if u.get("username") == "admin" and "db" in u.get("hosts", []):
341
  return u.get("password", "r00tP@ss!")
342
  return "r00tP@ss!"
343
+
344
+
345
+ def _find_smb_shares(spec: SnapshotSpec) -> list[str]:
346
+ """Extract Samba share names from snapshot files dict."""
347
+ shares: set[str] = set()
348
+ for key in spec.files:
349
+ if not key.startswith("files:"):
350
+ continue
351
+ path = key.split(":", 1)[1]
352
+ if "/srv/shares/" in path:
353
+ parts = path.split("/srv/shares/")[1].split("/")
354
+ if parts:
355
+ shares.add(parts[0])
356
+ return sorted(shares) or ["general"]
src/open_range/builder/service_manifest.py ADDED
@@ -0,0 +1,395 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Generate ServiceSpec entries from Docker Compose and topology definitions.
2
+
3
+ Translates Docker Compose service definitions into subprocess-mode daemon
4
+ lifecycle declarations. The primary consumer is ``SnapshotRenderer`` which
5
+ stores the generated list in ``SnapshotSpec.services`` so that
6
+ ``RangeEnvironment._start_snapshot_services()`` can start the correct daemons
7
+ at episode reset time without relying on a hardcoded host-to-service map.
8
+
9
+ The ``_IMAGE_SERVICE_HINTS`` mapping is intentionally a *hint* table, not a
10
+ hard requirement. Unknown images are skipped with a warning rather than
11
+ raising an error — this keeps the system forward-compatible with new services
12
+ that haven't been catalogued yet.
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ import logging
18
+ from typing import Any
19
+
20
+ from open_range.protocols import ReadinessCheck, ServiceSpec
21
+
22
+ logger = logging.getLogger(__name__)
23
+
24
+ # ---------------------------------------------------------------------------
25
+ # Image hint table
26
+ # ---------------------------------------------------------------------------
27
+ # Maps Docker image name prefixes to a tuple of:
28
+ # (daemon_name, packages, init_commands, start_command, readiness)
29
+ #
30
+ # Values are *templates* — callers may override port, log_dir, env_vars.
31
+ # The start_command may contain ``{log_dir}`` which is interpolated at
32
+ # generation time.
33
+
34
+ _ImageHint = tuple[
35
+ str, # daemon
36
+ list[str], # packages
37
+ list[str], # init_commands
38
+ str, # start_command
39
+ ReadinessCheck, # readiness
40
+ ]
41
+
42
+ _IMAGE_SERVICE_HINTS: dict[str, _ImageHint] = {
43
+ # ── Web ──────────────────────────────────────────────────────────
44
+ "nginx": (
45
+ "nginx",
46
+ ["nginx"],
47
+ ["mkdir -p /var/log/nginx"],
48
+ "nginx -g 'daemon off;' > {log_dir}/nginx.log 2>&1 &",
49
+ ReadinessCheck(type="tcp", port=80, timeout_s=10),
50
+ ),
51
+
52
+ # ── Databases ────────────────────────────────────────────────────
53
+ "mysql": (
54
+ "mysqld",
55
+ ["default-mysql-server", "default-mysql-client"],
56
+ [
57
+ "mkdir -p /var/run/mysqld && chown mysql:mysql /var/run/mysqld 2>/dev/null || true",
58
+ "mkdir -p /var/log/mysql && chown mysql:mysql /var/log/mysql 2>/dev/null || true",
59
+ ],
60
+ "mysqld --user=mysql --log-error={log_dir}/mysql.log &",
61
+ ReadinessCheck(type="command", command="mysqladmin ping --silent 2>/dev/null || mariadb-admin ping --silent 2>/dev/null", timeout_s=30),
62
+ ),
63
+ "mariadb": (
64
+ "mariadbd",
65
+ ["default-mysql-server", "default-mysql-client"],
66
+ [
67
+ "mkdir -p /var/run/mysqld && chown mysql:mysql /var/run/mysqld 2>/dev/null || true",
68
+ "mkdir -p /var/log/mysql && chown mysql:mysql /var/log/mysql 2>/dev/null || true",
69
+ ],
70
+ "mariadbd --user=mysql --log-error={log_dir}/mysql.log &",
71
+ ReadinessCheck(type="command", command="mariadb-admin ping --silent 2>/dev/null || mysqladmin ping --silent 2>/dev/null", timeout_s=30),
72
+ ),
73
+ "postgres": (
74
+ "postgres",
75
+ ["postgresql"],
76
+ [
77
+ "mkdir -p /var/run/postgresql && chown postgres:postgres /var/run/postgresql 2>/dev/null || true",
78
+ ],
79
+ "su - postgres -c 'pg_ctl start -D /var/lib/postgresql/data -l {log_dir}/postgres.log' &",
80
+ ReadinessCheck(type="tcp", port=5432, timeout_s=30),
81
+ ),
82
+
83
+ # ── Directory ────────────────────────────────────────────────────
84
+ "openldap": (
85
+ "slapd",
86
+ ["slapd", "ldap-utils"],
87
+ ["mkdir -p /var/run/slapd"],
88
+ "slapd -h 'ldap:/// ldapi:///' -u openldap -g openldap > {log_dir}/slapd.log 2>&1 &",
89
+ ReadinessCheck(type="command", command="ldapsearch -x -H ldap://localhost -b '' -s base namingContexts >/dev/null 2>&1", timeout_s=10),
90
+ ),
91
+ "osixia/openldap": (
92
+ "slapd",
93
+ ["slapd", "ldap-utils"],
94
+ ["mkdir -p /var/run/slapd"],
95
+ "slapd -h 'ldap:/// ldapi:///' -u openldap -g openldap > {log_dir}/slapd.log 2>&1 &",
96
+ ReadinessCheck(type="command", command="ldapsearch -x -H ldap://localhost -b '' -s base namingContexts >/dev/null 2>&1", timeout_s=10),
97
+ ),
98
+
99
+ # ── Logging ──────────────────────────────────────────────────────
100
+ "rsyslog": (
101
+ "rsyslogd",
102
+ ["rsyslog"],
103
+ [],
104
+ "rsyslogd -n > {log_dir}/rsyslog.log 2>&1 &",
105
+ ReadinessCheck(type="command", command="pgrep -x rsyslogd", timeout_s=5),
106
+ ),
107
+
108
+ # ── File sharing ───────────────────────────��─────────────────────
109
+ "samba": (
110
+ "smbd",
111
+ ["samba"],
112
+ ["mkdir -p /var/lib/samba/private"],
113
+ "smbd --foreground --no-process-group > {log_dir}/smbd.log 2>&1 &",
114
+ ReadinessCheck(type="tcp", port=445, timeout_s=10),
115
+ ),
116
+
117
+ # ── Mail ─────────────────────────────────────────────────────────
118
+ "postfix": (
119
+ "master",
120
+ ["postfix"],
121
+ [],
122
+ "postfix start > {log_dir}/postfix.log 2>&1 || true",
123
+ ReadinessCheck(type="tcp", port=25, timeout_s=10),
124
+ ),
125
+
126
+ # ── Cache ────────────────────────────────────────────────────────
127
+ "redis": (
128
+ "redis-server",
129
+ ["redis-server"],
130
+ [],
131
+ "redis-server --daemonize yes --logfile {log_dir}/redis.log",
132
+ ReadinessCheck(type="tcp", port=6379, timeout_s=10),
133
+ ),
134
+
135
+ # ── CI/CD ────────────────────────────────────────────────────────
136
+ "jenkins": (
137
+ "java",
138
+ ["default-jdk"],
139
+ [],
140
+ "java -jar /usr/share/jenkins/jenkins.war --httpPort=8080 > {log_dir}/jenkins.log 2>&1 &",
141
+ ReadinessCheck(type="http", url="http://localhost:8080/login", timeout_s=60),
142
+ ),
143
+
144
+ # ── Monitoring ───────────────────────────────────────────────────
145
+ "prometheus": (
146
+ "prometheus",
147
+ ["prometheus"],
148
+ [],
149
+ "prometheus --config.file=/etc/prometheus/prometheus.yml --web.listen-address=:9090 > {log_dir}/prometheus.log 2>&1 &",
150
+ ReadinessCheck(type="http", url="http://localhost:9090/-/ready", timeout_s=15),
151
+ ),
152
+ "grafana": (
153
+ "grafana-server",
154
+ ["grafana"],
155
+ [],
156
+ "grafana-server --homepath=/usr/share/grafana > {log_dir}/grafana.log 2>&1 &",
157
+ ReadinessCheck(type="http", url="http://localhost:3000/api/health", timeout_s=15),
158
+ ),
159
+
160
+ # ── Remote access ────────────────────────────────────────────────
161
+ "openssh": (
162
+ "sshd",
163
+ ["openssh-server"],
164
+ ["mkdir -p /var/run/sshd"],
165
+ "/usr/sbin/sshd -E {log_dir}/sshd.log",
166
+ ReadinessCheck(type="tcp", port=22, timeout_s=5),
167
+ ),
168
+ "linuxserver/openssh-server": (
169
+ "sshd",
170
+ ["openssh-server"],
171
+ ["mkdir -p /var/run/sshd"],
172
+ "/usr/sbin/sshd -E {log_dir}/sshd.log",
173
+ ReadinessCheck(type="tcp", port=22, timeout_s=5),
174
+ ),
175
+ }
176
+
177
+ # ---------------------------------------------------------------------------
178
+ # Topology host-name hints (fallback when compose services are absent)
179
+ # ---------------------------------------------------------------------------
180
+ # Maps logical host names commonly used in manifests to the same hint keys.
181
+
182
+ _HOST_NAME_HINTS: dict[str, str] = {
183
+ "web": "nginx",
184
+ "db": "mysql",
185
+ "ldap": "openldap",
186
+ "siem": "rsyslog",
187
+ "files": "samba",
188
+ "mail": "postfix",
189
+ "firewall": "rsyslog",
190
+ "cache": "redis",
191
+ "redis": "redis",
192
+ "ci_cd": "jenkins",
193
+ "ci": "jenkins",
194
+ "monitoring": "prometheus",
195
+ "ssh": "openssh",
196
+ }
197
+
198
+ # Default log directory used when none is specified.
199
+ _DEFAULT_LOG_DIR = "/var/log/siem"
200
+
201
+
202
+ # ---------------------------------------------------------------------------
203
+ # Public API
204
+ # ---------------------------------------------------------------------------
205
+
206
+
207
+ def generate_service_specs(
208
+ compose: dict[str, Any],
209
+ topology: dict[str, Any],
210
+ ) -> list[ServiceSpec]:
211
+ """Generate ServiceSpec entries from compose and topology.
212
+
213
+ Translates Docker Compose service definitions into subprocess-mode
214
+ daemon lifecycle declarations.
215
+
216
+ The function examines ``compose["services"]`` first. For each service
217
+ whose image matches a known hint, a ``ServiceSpec`` is produced. If
218
+ the compose dict is empty or missing, the function falls back to the
219
+ topology host list using ``_HOST_NAME_HINTS``.
220
+
221
+ Services that cannot be mapped (e.g. custom images with no hint) are
222
+ skipped with a debug-level log message.
223
+
224
+ Parameters
225
+ ----------
226
+ compose:
227
+ Parsed docker-compose dict (may be empty).
228
+ topology:
229
+ Parsed topology dict from the manifest / snapshot.
230
+
231
+ Returns
232
+ -------
233
+ list[ServiceSpec]
234
+ One entry per recognised service. Order follows the compose
235
+ services dict (or the topology hosts list as fallback).
236
+ """
237
+ specs: list[ServiceSpec] = []
238
+ seen_daemons: set[str] = set()
239
+
240
+ services = compose.get("services", {}) if compose else {}
241
+
242
+ if services:
243
+ specs = _from_compose(services, seen_daemons)
244
+ else:
245
+ specs = _from_topology(topology, seen_daemons)
246
+
247
+ return specs
248
+
249
+
250
+ # ---------------------------------------------------------------------------
251
+ # Internal helpers
252
+ # ---------------------------------------------------------------------------
253
+
254
+
255
+ def _match_image_hint(image: str) -> _ImageHint | None:
256
+ """Match a Docker image string to the closest hint entry.
257
+
258
+ Strips tags (``mysql:8.0`` -> ``mysql``), handles namespaced images
259
+ (``osixia/openldap:1.5`` -> ``osixia/openldap``), and falls back to
260
+ substring matching on the image basename.
261
+ """
262
+ if not image:
263
+ return None
264
+
265
+ # Remove tag
266
+ base = image.split(":")[0].strip()
267
+
268
+ # Exact match (with or without namespace)
269
+ if base in _IMAGE_SERVICE_HINTS:
270
+ return _IMAGE_SERVICE_HINTS[base]
271
+
272
+ # Try basename only (e.g. ``bitnami/redis`` -> ``redis``)
273
+ basename = base.rsplit("/", 1)[-1]
274
+ if basename in _IMAGE_SERVICE_HINTS:
275
+ return _IMAGE_SERVICE_HINTS[basename]
276
+
277
+ # Substring match as last resort (e.g. ``mysql/mysql-server`` -> ``mysql``)
278
+ for key, hint in _IMAGE_SERVICE_HINTS.items():
279
+ if "/" not in key and key in basename:
280
+ return hint
281
+
282
+ return None
283
+
284
+
285
+ def _env_from_compose_service(svc_def: dict[str, Any]) -> dict[str, str]:
286
+ """Extract environment variables from a compose service definition.
287
+
288
+ Handles both the ``list`` form (``- KEY=VALUE``) and the ``dict`` form.
289
+ """
290
+ raw = svc_def.get("environment", {})
291
+ if isinstance(raw, list):
292
+ env: dict[str, str] = {}
293
+ for entry in raw:
294
+ if "=" in entry:
295
+ k, v = entry.split("=", 1)
296
+ env[k] = v
297
+ return env
298
+ if isinstance(raw, dict):
299
+ return {str(k): str(v) for k, v in raw.items()}
300
+ return {}
301
+
302
+
303
+ def _build_service_spec(
304
+ host: str,
305
+ hint: _ImageHint,
306
+ log_dir: str = _DEFAULT_LOG_DIR,
307
+ env_vars: dict[str, str] | None = None,
308
+ ) -> ServiceSpec:
309
+ """Build a ServiceSpec from a matched hint tuple."""
310
+ daemon, packages, init_commands, start_command, readiness = hint
311
+ return ServiceSpec(
312
+ host=host,
313
+ daemon=daemon,
314
+ packages=list(packages),
315
+ init_commands=list(init_commands),
316
+ start_command=start_command.format(log_dir=log_dir),
317
+ readiness=readiness.model_copy(),
318
+ log_dir=log_dir,
319
+ env_vars=env_vars or {},
320
+ )
321
+
322
+
323
+ def _from_compose(
324
+ services: dict[str, Any],
325
+ seen_daemons: set[str],
326
+ ) -> list[ServiceSpec]:
327
+ """Generate specs from the compose services section."""
328
+ specs: list[ServiceSpec] = []
329
+
330
+ for svc_name, svc_def in services.items():
331
+ if not isinstance(svc_def, dict):
332
+ continue
333
+
334
+ image = svc_def.get("image", "")
335
+ hint = _match_image_hint(image)
336
+
337
+ # If no image, try matching the service name itself
338
+ if hint is None and svc_name in _HOST_NAME_HINTS:
339
+ fallback_key = _HOST_NAME_HINTS[svc_name]
340
+ hint = _IMAGE_SERVICE_HINTS.get(fallback_key)
341
+
342
+ if hint is None:
343
+ logger.debug(
344
+ "No service hint for compose service %r (image=%r) — skipping",
345
+ svc_name,
346
+ image,
347
+ )
348
+ continue
349
+
350
+ daemon = hint[0]
351
+ if daemon in seen_daemons:
352
+ continue
353
+ seen_daemons.add(daemon)
354
+
355
+ env_vars = _env_from_compose_service(svc_def)
356
+ spec = _build_service_spec(
357
+ host=svc_name,
358
+ hint=hint,
359
+ env_vars=env_vars,
360
+ )
361
+ specs.append(spec)
362
+
363
+ return specs
364
+
365
+
366
+ def _from_topology(
367
+ topology: dict[str, Any],
368
+ seen_daemons: set[str],
369
+ ) -> list[ServiceSpec]:
370
+ """Generate specs from the topology hosts list (fallback path)."""
371
+ specs: list[ServiceSpec] = []
372
+ hosts = topology.get("hosts", [])
373
+
374
+ for host_entry in hosts:
375
+ host_name = host_entry if isinstance(host_entry, str) else host_entry.get("name", "")
376
+ if not host_name:
377
+ continue
378
+
379
+ hint_key = _HOST_NAME_HINTS.get(host_name)
380
+ if hint_key is None:
381
+ continue
382
+
383
+ hint = _IMAGE_SERVICE_HINTS.get(hint_key)
384
+ if hint is None:
385
+ continue
386
+
387
+ daemon = hint[0]
388
+ if daemon in seen_daemons:
389
+ continue
390
+ seen_daemons.add(daemon)
391
+
392
+ spec = _build_service_spec(host=host_name, hint=hint)
393
+ specs.append(spec)
394
+
395
+ return specs
src/open_range/builder/templates/docker-compose.yml.j2 CHANGED
@@ -110,7 +110,7 @@ services:
110
  mail:
111
  image: namshi/smtp:latest
112
  environment:
113
- - MAILNAME={{ domain | default('meridianhealth.local') }}
114
  volumes:
115
  - shared_logs:/var/log/mail
116
  networks:
@@ -125,9 +125,9 @@ services:
125
  command: --default-authentication-plugin=mysql_native_password
126
  environment:
127
  - MYSQL_ROOT_PASSWORD={{ mysql_root_password | default('r00tP@ss!') }}
128
- - MYSQL_DATABASE=referral_db
129
- - MYSQL_USER=app_user
130
- - MYSQL_PASSWORD=AppUs3r!2024
131
  volumes:
132
  - db_data:/var/lib/mysql
133
  - shared_logs:/var/log/mysql
@@ -144,11 +144,10 @@ services:
144
  files:
145
  image: dperson/samba:latest
146
  environment:
147
- - USER=smbuser;smbP@ss!
148
- - SHARE=general;/srv/shares/general;yes;no;no;smbuser
149
- - SHARE2=hr;/srv/shares/hr;yes;no;no;smbuser
150
- - SHARE3=compliance;/srv/shares/compliance;yes;no;no;smbuser
151
- - SHARE4=contracts;/srv/shares/contracts;yes;no;no;smbuser
152
  volumes:
153
  - shared_logs:/var/log/samba
154
  networks:
@@ -159,8 +158,8 @@ services:
159
  ldap:
160
  image: osixia/openldap:latest
161
  environment:
162
- - LDAP_ORGANISATION={{ org_name | default('MeridianHealth') }}
163
- - LDAP_DOMAIN={{ domain | default('meridianhealth.local') }}
164
  - LDAP_ADMIN_PASSWORD={{ ldap_admin_pass | default('LdapAdm1n!') }}
165
  volumes:
166
  - shared_logs:/var/log/ldap
 
110
  mail:
111
  image: namshi/smtp:latest
112
  environment:
113
+ - MAILNAME={{ domain | default('corp.local') }}
114
  volumes:
115
  - shared_logs:/var/log/mail
116
  networks:
 
125
  command: --default-authentication-plugin=mysql_native_password
126
  environment:
127
  - MYSQL_ROOT_PASSWORD={{ mysql_root_password | default('r00tP@ss!') }}
128
+ - MYSQL_DATABASE={{ db_name | default('app_db') }}
129
+ - MYSQL_USER={{ db_user | default('app_user') }}
130
+ - MYSQL_PASSWORD={{ db_password | default('AppUs3r!2024') }}
131
  volumes:
132
  - db_data:/var/lib/mysql
133
  - shared_logs:/var/log/mysql
 
144
  files:
145
  image: dperson/samba:latest
146
  environment:
147
+ - USER={{ smb_user | default('smbuser') }};{{ smb_password | default('smbP@ss!') }}
148
+ {%- for share in smb_shares | default(['general', 'hr', 'compliance', 'contracts']) %}
149
+ - SHARE{{ loop.index if loop.index > 1 else '' }}={{ share }};/srv/shares/{{ share }};yes;no;no;{{ smb_user | default('smbuser') }}
150
+ {%- endfor %}
 
151
  volumes:
152
  - shared_logs:/var/log/samba
153
  networks:
 
158
  ldap:
159
  image: osixia/openldap:latest
160
  environment:
161
+ - LDAP_ORGANISATION={{ org_name | default('Corp') }}
162
+ - LDAP_DOMAIN={{ domain | default('corp.local') }}
163
  - LDAP_ADMIN_PASSWORD={{ ldap_admin_pass | default('LdapAdm1n!') }}
164
  volumes:
165
  - shared_logs:/var/log/ldap
src/open_range/cli.py CHANGED
@@ -220,6 +220,10 @@ def build(
220
  @click.option("--teacher-model", default=None, help="LiteLLM teacher model. If omitted, selected roles use scripted agents.")
221
  @click.option("--red-model", default=None, help="Override model for Red teacher.")
222
  @click.option("--blue-model", default=None, help="Override model for Blue teacher.")
 
 
 
 
223
  @click.option("--temperature", default=0.2, type=float, help="Teacher sampling temperature.")
224
  @click.option("--max-tokens", default=512, type=int, help="Maximum completion tokens per teacher action.")
225
  @click.option("--template-only/--llm-builder", default=True, help="When using --manifest, build snapshots deterministically instead of via LLM.")
@@ -238,6 +242,10 @@ def synthetic_data(
238
  teacher_model: str | None,
239
  red_model: str | None,
240
  blue_model: str | None,
 
 
 
 
241
  temperature: float,
242
  max_tokens: int,
243
  template_only: bool,
@@ -249,6 +257,13 @@ def synthetic_data(
249
  SyntheticTraceGenerator,
250
  build_teacher_agents,
251
  )
 
 
 
 
 
 
 
252
 
253
  if bool(manifest) == bool(snapshot):
254
  click.echo("Error: provide exactly one of --manifest or --snapshot.", err=True)
@@ -259,11 +274,25 @@ def synthetic_data(
259
  teacher_model
260
  or os.environ.get("OPENRANGE_SYNTH_MODEL")
261
  )
 
 
262
  red_agent, blue_agent = build_teacher_agents(
263
  teacher_model=resolved_teacher_model,
264
  roles=selected_roles,
265
  red_model=red_model,
266
  blue_model=blue_model,
 
 
 
 
 
 
 
 
 
 
 
 
267
  temperature=temperature,
268
  max_tokens=max_tokens,
269
  )
@@ -274,6 +303,7 @@ def synthetic_data(
274
  snapshot=_load_snapshot(snapshot),
275
  red_agent=red_agent,
276
  blue_agent=blue_agent,
 
277
  tier=tier,
278
  max_steps=max_steps,
279
  randomize_flags=randomize_flags,
@@ -284,6 +314,7 @@ def synthetic_data(
284
  _load_manifest(str(manifest)),
285
  red_agent=red_agent,
286
  blue_agent=blue_agent,
 
287
  template_only=template_only,
288
  builder_model=builder_model,
289
  tier=tier,
@@ -307,18 +338,38 @@ def synthetic_data(
307
  + (", ".join(teacher_roles) if teacher_roles else "none (scripted fallbacks)")
308
  )
309
  try:
310
- logger, count = generator.export_jsonl(
311
- output,
312
  num_traces=num_traces,
313
  seed=seed,
 
 
314
  reward_threshold=reward_threshold,
315
  roles=selected_roles,
316
  )
 
 
 
 
 
 
 
 
 
 
317
  except Exception as exc:
318
  click.echo(f"Error: synthetic data generation failed: {exc}", err=True)
319
  sys.exit(1)
320
 
321
  click.echo(f"Wrote {count} JSONL records to {output}")
 
 
 
 
 
 
 
 
 
322
  click.echo(f" Episodes: {len(logger.episodes)}")
323
  click.echo(f" Randomized flags: {'yes' if randomize_flags else 'no'}")
324
 
@@ -533,6 +584,123 @@ def deploy(snapshot: str, compose_dir: str | None) -> None:
533
  pass # Non-critical
534
 
535
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
536
  # ---------------------------------------------------------------------------
537
  # server
538
  # ---------------------------------------------------------------------------
 
220
  @click.option("--teacher-model", default=None, help="LiteLLM teacher model. If omitted, selected roles use scripted agents.")
221
  @click.option("--red-model", default=None, help="Override model for Red teacher.")
222
  @click.option("--blue-model", default=None, help="Override model for Blue teacher.")
223
+ @click.option("--bootstrap-traces", multiple=True, type=click.Path(exists=True), help="Existing SFT JSONL files to merge into the output.")
224
+ @click.option("--bootstrap-examples", default=0, type=click.IntRange(0), help="How many bootstrap traces to inject as few-shot examples per generated role.")
225
+ @click.option("--merge-bootstrap/--generated-only", default=True, help="Merge bootstrap traces into the output file, or emit only newly generated records.")
226
+ @click.option("--tool-info", multiple=True, type=click.Path(exists=True), help="Text, JSON, or YAML tool catalog file to append to generated system prompts.")
227
  @click.option("--temperature", default=0.2, type=float, help="Teacher sampling temperature.")
228
  @click.option("--max-tokens", default=512, type=int, help="Maximum completion tokens per teacher action.")
229
  @click.option("--template-only/--llm-builder", default=True, help="When using --manifest, build snapshots deterministically instead of via LLM.")
 
242
  teacher_model: str | None,
243
  red_model: str | None,
244
  blue_model: str | None,
245
+ bootstrap_traces: tuple[str, ...],
246
+ bootstrap_examples: int,
247
+ merge_bootstrap: bool,
248
+ tool_info: tuple[str, ...],
249
  temperature: float,
250
  max_tokens: int,
251
  template_only: bool,
 
257
  SyntheticTraceGenerator,
258
  build_teacher_agents,
259
  )
260
+ from open_range.training.dataset import (
261
+ append_tool_context,
262
+ extract_bootstrap_messages,
263
+ load_jsonl_records,
264
+ load_tool_context,
265
+ write_jsonl_records,
266
+ )
267
 
268
  if bool(manifest) == bool(snapshot):
269
  click.echo("Error: provide exactly one of --manifest or --snapshot.", err=True)
 
274
  teacher_model
275
  or os.environ.get("OPENRANGE_SYNTH_MODEL")
276
  )
277
+ bootstrap_records = load_jsonl_records(bootstrap_traces) if bootstrap_traces else []
278
+ tool_context = load_tool_context(tool_info) if tool_info else ""
279
  red_agent, blue_agent = build_teacher_agents(
280
  teacher_model=resolved_teacher_model,
281
  roles=selected_roles,
282
  red_model=red_model,
283
  blue_model=blue_model,
284
+ red_bootstrap_messages=extract_bootstrap_messages(
285
+ bootstrap_records,
286
+ role="red",
287
+ limit=bootstrap_examples,
288
+ ),
289
+ blue_bootstrap_messages=extract_bootstrap_messages(
290
+ bootstrap_records,
291
+ role="blue",
292
+ limit=bootstrap_examples,
293
+ ),
294
+ red_system_suffix=tool_context,
295
+ blue_system_suffix=tool_context,
296
  temperature=temperature,
297
  max_tokens=max_tokens,
298
  )
 
303
  snapshot=_load_snapshot(snapshot),
304
  red_agent=red_agent,
305
  blue_agent=blue_agent,
306
+ active_roles=selected_roles,
307
  tier=tier,
308
  max_steps=max_steps,
309
  randomize_flags=randomize_flags,
 
314
  _load_manifest(str(manifest)),
315
  red_agent=red_agent,
316
  blue_agent=blue_agent,
317
+ active_roles=selected_roles,
318
  template_only=template_only,
319
  builder_model=builder_model,
320
  tier=tier,
 
338
  + (", ".join(teacher_roles) if teacher_roles else "none (scripted fallbacks)")
339
  )
340
  try:
341
+ logger = generator.generate(
 
342
  num_traces=num_traces,
343
  seed=seed,
344
+ )
345
+ generated_records = logger.to_records(
346
  reward_threshold=reward_threshold,
347
  roles=selected_roles,
348
  )
349
+ if tool_context:
350
+ generated_records = append_tool_context(
351
+ generated_records,
352
+ tool_context,
353
+ )
354
+
355
+ records_to_write = [*bootstrap_records, *generated_records] if merge_bootstrap else generated_records
356
+ count = write_jsonl_records(output, records_to_write)
357
+ generated_count = len(generated_records)
358
+ bootstrap_count = len(bootstrap_records)
359
  except Exception as exc:
360
  click.echo(f"Error: synthetic data generation failed: {exc}", err=True)
361
  sys.exit(1)
362
 
363
  click.echo(f"Wrote {count} JSONL records to {output}")
364
+ click.echo(f" Generated records: {generated_count}")
365
+ if bootstrap_traces and merge_bootstrap:
366
+ click.echo(f" Bootstrap records: {bootstrap_count}")
367
+ elif bootstrap_traces:
368
+ click.echo(f" Bootstrap records loaded for prompting only: {bootstrap_count}")
369
+ if bootstrap_examples:
370
+ click.echo(f" Few-shot bootstrap examples per role: {bootstrap_examples}")
371
+ if tool_info:
372
+ click.echo(f" Tool catalogs applied: {len(tool_info)}")
373
  click.echo(f" Episodes: {len(logger.episodes)}")
374
  click.echo(f" Randomized flags: {'yes' if randomize_flags else 'no'}")
375
 
 
584
  pass # Non-critical
585
 
586
 
587
+ # ---------------------------------------------------------------------------
588
+ # episode
589
+ # ---------------------------------------------------------------------------
590
+
591
+
592
+ @cli.command()
593
+ @click.option("-s", "--snapshot", required=True, type=click.Path(exists=True), help="Path to snapshot JSON.")
594
+ @click.option("--mode", default="red", type=click.Choice(["red", "blue", "both"]), help="Agent role(s) to play.")
595
+ @click.option("--golden-path", "golden", is_flag=True, default=False, help="Replay golden path steps (Red only).")
596
+ @click.option("--interactive", is_flag=True, default=False, help="Interactive mode (read commands from stdin).")
597
+ @click.option("--docker/--no-docker", default=False, help="Use Docker containers (default: mock mode).")
598
+ @click.option("--max-steps", default=50, type=click.IntRange(1), help="Maximum steps per episode.")
599
+ def episode(
600
+ snapshot: str,
601
+ mode: str,
602
+ golden: bool,
603
+ interactive: bool,
604
+ docker: bool,
605
+ max_steps: int,
606
+ ) -> None:
607
+ """Run an episode against a snapshot.
608
+
609
+ Golden-path mode replays the snapshot's golden path commands as Red.
610
+ Interactive mode reads commands from stdin. Default runs golden path
611
+ if available, otherwise enters interactive mode.
612
+
613
+ \b
614
+ Examples:
615
+ openrange episode -s snapshots/spec.json --golden-path
616
+ openrange episode -s snapshots/spec.json --interactive --mode both
617
+ """
618
+ from open_range.server.environment import RangeEnvironment
619
+ from open_range.server.models import RangeAction
620
+
621
+ spec = _load_snapshot(snapshot)
622
+
623
+ env = RangeEnvironment(docker_available=docker, max_steps=max_steps)
624
+ obs = env.reset(snapshot=spec, episode_id="cli-episode")
625
+ click.echo(f"[RESET] {obs.stdout[:200]}")
626
+ click.echo()
627
+
628
+ if golden or (not interactive and spec.golden_path):
629
+ # Golden path replay
630
+ if not spec.golden_path:
631
+ click.echo("Error: snapshot has no golden path steps.", err=True)
632
+ sys.exit(1)
633
+
634
+ click.echo(f"Replaying {len(spec.golden_path)} golden path steps ...\n")
635
+ for gp in spec.golden_path:
636
+ action = RangeAction(command=gp.command, mode="red")
637
+ result = env.step(action)
638
+ reward = result.reward if result.reward is not None else 0.0
639
+
640
+ status = ""
641
+ if result.flags_captured:
642
+ status = f" FLAGS={result.flags_captured}"
643
+ if result.done:
644
+ status += " [DONE]"
645
+
646
+ click.echo(f" [{gp.step:2d}] RED >> {gp.command[:80]}")
647
+ if docker:
648
+ stdout_preview = result.stdout[:120].replace("\n", " ")
649
+ click.echo(f" stdout: {stdout_preview}")
650
+ else:
651
+ click.echo(f" expect: {gp.expect_in_stdout[:60]}")
652
+ click.echo(f" reward={reward:.4f}{status}")
653
+
654
+ if result.done:
655
+ break
656
+
657
+ elif interactive:
658
+ # Interactive REPL
659
+ click.echo("Interactive mode. Type commands, Ctrl-D to exit.\n")
660
+ current_mode = mode if mode != "both" else "red"
661
+ try:
662
+ while True:
663
+ prompt = f"[{current_mode.upper()}] >> "
664
+ try:
665
+ cmd = input(prompt)
666
+ except EOFError:
667
+ break
668
+ if not cmd.strip():
669
+ continue
670
+ if cmd.strip() == "/switch" and mode == "both":
671
+ current_mode = "blue" if current_mode == "red" else "red"
672
+ click.echo(f"Switched to {current_mode.upper()}")
673
+ continue
674
+
675
+ action = RangeAction(command=cmd, mode=current_mode)
676
+ result = env.step(action)
677
+ if result.stdout:
678
+ click.echo(result.stdout)
679
+ if result.stderr:
680
+ click.echo(result.stderr, err=True)
681
+ reward = result.reward if result.reward is not None else 0.0
682
+ click.echo(f"[reward={reward:.4f}]")
683
+ if result.done:
684
+ click.echo("[EPISODE DONE]")
685
+ break
686
+ except KeyboardInterrupt:
687
+ click.echo("\nInterrupted.")
688
+ else:
689
+ click.echo("No golden path and --interactive not set. Use --interactive for manual play.", err=True)
690
+ sys.exit(1)
691
+
692
+ # Print final state
693
+ state = env.state
694
+ click.echo(f"\n{'='*60}")
695
+ click.echo(f" RESULT")
696
+ click.echo(f"{'='*60}")
697
+ click.echo(f" Steps: {state.step_count}")
698
+ click.echo(f" Flags found: {state.flags_found}")
699
+ click.echo(f" Tier: {state.tier}")
700
+ click.echo(f" Episode: {state.episode_id}")
701
+ click.echo(f"{'='*60}")
702
+
703
+
704
  # ---------------------------------------------------------------------------
705
  # server
706
  # ---------------------------------------------------------------------------
src/open_range/protocols.py CHANGED
@@ -15,7 +15,50 @@ from pydantic import AliasChoices, BaseModel, ConfigDict, Field
15
 
16
 
17
  # ---------------------------------------------------------------------------
18
- # Pydantic models
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  # ---------------------------------------------------------------------------
20
 
21
 
@@ -204,6 +247,7 @@ class SnapshotSpec(BaseModel):
204
  task: TaskSpec = Field(default_factory=TaskSpec)
205
  compose: dict[str, Any] = Field(default_factory=dict) # rendered docker-compose
206
  files: dict[str, str] = Field(default_factory=dict) # path -> content
 
207
  lineage: LineageMetadata = Field(default_factory=LineageMetadata)
208
  mutation_plan: MutationPlan | None = None
209
 
 
15
 
16
 
17
  # ---------------------------------------------------------------------------
18
+ # Pydantic models — service lifecycle
19
+ # ---------------------------------------------------------------------------
20
+
21
+
22
+ class ReadinessCheck(BaseModel):
23
+ """How to verify a service is ready after starting.
24
+
25
+ Supports three probe types:
26
+ - ``tcp``: connect to *port* on localhost.
27
+ - ``http``: GET *url* and expect a 2xx response.
28
+ - ``command``: run *command* and expect exit code 0.
29
+ """
30
+
31
+ type: Literal["tcp", "http", "command"] = "tcp"
32
+ port: int = 0
33
+ url: str = ""
34
+ command: str = ""
35
+ timeout_s: int = 30
36
+ interval_s: float = 1.0
37
+
38
+
39
+ class ServiceSpec(BaseModel):
40
+ """Declarative service lifecycle for subprocess mode.
41
+
42
+ Generated by the Renderer alongside docker-compose.yml.
43
+ Consumed by ``RangeEnvironment._start_snapshot_services()``.
44
+
45
+ Each entry describes one daemon that must be running for the snapshot
46
+ to function. The *host* field links back to the topology host name
47
+ so that stop/restart logic can correlate services to logical hosts.
48
+ """
49
+
50
+ host: str
51
+ daemon: str
52
+ packages: list[str] = Field(default_factory=list)
53
+ init_commands: list[str] = Field(default_factory=list)
54
+ start_command: str
55
+ readiness: ReadinessCheck = Field(default_factory=ReadinessCheck)
56
+ log_dir: str = ""
57
+ env_vars: dict[str, str] = Field(default_factory=dict)
58
+
59
+
60
+ # ---------------------------------------------------------------------------
61
+ # Pydantic models — build context & topology
62
  # ---------------------------------------------------------------------------
63
 
64
 
 
247
  task: TaskSpec = Field(default_factory=TaskSpec)
248
  compose: dict[str, Any] = Field(default_factory=dict) # rendered docker-compose
249
  files: dict[str, str] = Field(default_factory=dict) # path -> content
250
+ services: list[ServiceSpec] = Field(default_factory=list) # subprocess-mode daemons
251
  lineage: LineageMetadata = Field(default_factory=LineageMetadata)
252
  mutation_plan: MutationPlan | None = None
253
 
src/open_range/server/environment.py CHANGED
@@ -23,7 +23,7 @@ import time
23
  from typing import TYPE_CHECKING, Any
24
  from uuid import uuid4
25
 
26
- from open_range.protocols import SnapshotSpec, TaskSpec
27
 
28
  from open_range.server.models import RangeAction, RangeObservation, RangeState
29
 
@@ -209,15 +209,19 @@ class RangeEnvironment(_BASE): # type: ignore[misc]
209
  if self._execution_mode == "subprocess":
210
  return host
211
 
212
- # In unit-test mock mode, return the bare hostname for compatibility
 
 
213
  if self._docker_available is False and self._execution_mode == "docker":
214
  return host
215
 
216
- raise RuntimeError(
217
- f"Cannot resolve container for host '{host}'. "
218
- f"No compose config, no running container found, and no mock mode active. "
219
- f"Ensure Docker is running or provide a snapshot with compose configuration."
 
220
  )
 
221
 
222
  def _exec_via_subprocess(self, host: str, command: str, timeout: float = 30.0) -> tuple[str, str]:
223
  """Execute a command via local subprocess (all-in-one container mode).
@@ -636,6 +640,43 @@ class RangeEnvironment(_BASE): # type: ignore[misc]
636
  # NPC lifecycle
637
  # -----------------------------------------------------------------
638
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
639
  def _start_npcs(self, snapshot: SnapshotSpec) -> None:
640
  """Start NPC traffic generators for the current episode.
641
 
@@ -650,19 +691,24 @@ class RangeEnvironment(_BASE): # type: ignore[misc]
650
  from open_range.builder.npc.npc_manager import NPCManager
651
 
652
  mock = (self._docker_available is False) or (self._execution_mode != "docker")
653
- mgr = NPCManager(mock_mode=mock)
 
654
  self._npc_manager = mgr
655
 
 
 
 
656
  # Start synchronously (NPCManager.start_sync handles mock vs live)
657
- mgr.start_sync(snapshot)
658
 
659
  # Seed the traffic log immediately from chat traffic generated at
660
  # start time so that Blue has NPC noise from step 1.
661
  self._refresh_npc_traffic_log()
662
 
663
  logger.info(
664
- "NPC manager started (mock=%s, personas=%d)",
665
  mock,
 
666
  len(snapshot.npc_personas or []),
667
  )
668
  except Exception as exc:
 
23
  from typing import TYPE_CHECKING, Any
24
  from uuid import uuid4
25
 
26
+ from open_range.protocols import ServiceSpec, SnapshotSpec, TaskSpec
27
 
28
  from open_range.server.models import RangeAction, RangeObservation, RangeState
29
 
 
209
  if self._execution_mode == "subprocess":
210
  return host
211
 
212
+ # In unit-test mock mode or when no containers are running,
213
+ # return the bare hostname. Execution will fail gracefully
214
+ # (docker exec won't find the container → stderr returned).
215
  if self._docker_available is False and self._execution_mode == "docker":
216
  return host
217
 
218
+ # Docker is reachable but no matching container exists — return bare
219
+ # hostname so the exec layer can report the error in the observation
220
+ # instead of crashing the API.
221
+ logger.debug(
222
+ "No running container found for host '%s'; returning bare name", host
223
  )
224
+ return host
225
 
226
  def _exec_via_subprocess(self, host: str, command: str, timeout: float = 30.0) -> tuple[str, str]:
227
  """Execute a command via local subprocess (all-in-one container mode).
 
640
  # NPC lifecycle
641
  # -----------------------------------------------------------------
642
 
643
+ def _build_container_set(self) -> "ContainerSet | None":
644
+ """Build a ContainerSet from running Docker containers.
645
+
646
+ Returns None when Docker is unavailable or no containers are found.
647
+ """
648
+ from open_range.protocols import ContainerSet
649
+
650
+ client = self._get_docker()
651
+ if client is None:
652
+ return None
653
+
654
+ container_ids: dict[str, str] = {}
655
+ try:
656
+ for container in client.containers.list():
657
+ name = container.name
658
+ # Map service name to container id (open-range-web-1 → web)
659
+ for suffix in ("-1",):
660
+ if name.endswith(suffix):
661
+ svc = name.rsplit("-", 1)[0] # open-range-web
662
+ svc = svc.rsplit("-", 1)[-1] # web
663
+ container_ids[svc] = name
664
+ break
665
+ else:
666
+ container_ids[name] = name
667
+ except Exception as exc:
668
+ logger.debug("Container discovery failed: %s", exc)
669
+ return None
670
+
671
+ if not container_ids:
672
+ return None
673
+
674
+ project = "open-range"
675
+ if self._snapshot and self._snapshot.compose:
676
+ project = self._snapshot.compose.get("x-project-name", project)
677
+
678
+ return ContainerSet(project_name=project, container_ids=container_ids)
679
+
680
  def _start_npcs(self, snapshot: SnapshotSpec) -> None:
681
  """Start NPC traffic generators for the current episode.
682
 
 
691
  from open_range.builder.npc.npc_manager import NPCManager
692
 
693
  mock = (self._docker_available is False) or (self._execution_mode != "docker")
694
+ npc_model = os.environ.get("OPENRANGE_NPC_MODEL")
695
+ mgr = NPCManager(mock_mode=mock, model=npc_model)
696
  self._npc_manager = mgr
697
 
698
+ # Build ContainerSet for live Docker mode
699
+ containers = None if mock else self._build_container_set()
700
+
701
  # Start synchronously (NPCManager.start_sync handles mock vs live)
702
+ mgr.start_sync(snapshot, containers)
703
 
704
  # Seed the traffic log immediately from chat traffic generated at
705
  # start time so that Blue has NPC noise from step 1.
706
  self._refresh_npc_traffic_log()
707
 
708
  logger.info(
709
+ "NPC manager started (mock=%s, containers=%s, personas=%d)",
710
  mock,
711
+ bool(containers),
712
  len(snapshot.npc_personas or []),
713
  )
714
  except Exception as exc:
src/open_range/server/rewards.py CHANGED
@@ -243,7 +243,8 @@ class CompositeRedReward:
243
  }
244
  evidence_score = self.evidence.score(evidence_content, topo_hosts)
245
 
246
- # Social engineering
 
247
  social_successes = [
248
  e for e in npc_log
249
  if e.get("type") == "social_engineering" and e.get("result") == "success"
 
243
  }
244
  evidence_score = self.evidence.score(evidence_content, topo_hosts)
245
 
246
+ # Social engineering -- reactive NPC actions from send_phish or
247
+ # agent loop stimulus reactions (type=social_engineering, result=success/blocked)
248
  social_successes = [
249
  e for e in npc_log
250
  if e.get("type") == "social_engineering" and e.get("result") == "success"
src/open_range/training/__init__.py CHANGED
@@ -1,5 +1,12 @@
1
  """Training utilities for OpenRange."""
2
 
 
 
 
 
 
 
 
3
  from open_range.training.synthetic import (
4
  SyntheticRangeEnvironment,
5
  SyntheticTraceGenerator,
@@ -8,8 +15,13 @@ from open_range.training.synthetic import (
8
  )
9
 
10
  __all__ = [
 
 
 
 
11
  "SyntheticRangeEnvironment",
12
  "SyntheticTraceGenerator",
13
  "build_teacher_agents",
14
  "randomize_snapshot_flags",
 
15
  ]
 
1
  """Training utilities for OpenRange."""
2
 
3
+ from open_range.training.dataset import (
4
+ append_tool_context,
5
+ extract_bootstrap_messages,
6
+ load_jsonl_records,
7
+ load_tool_context,
8
+ write_jsonl_records,
9
+ )
10
  from open_range.training.synthetic import (
11
  SyntheticRangeEnvironment,
12
  SyntheticTraceGenerator,
 
15
  )
16
 
17
  __all__ = [
18
+ "append_tool_context",
19
+ "extract_bootstrap_messages",
20
+ "load_jsonl_records",
21
+ "load_tool_context",
22
  "SyntheticRangeEnvironment",
23
  "SyntheticTraceGenerator",
24
  "build_teacher_agents",
25
  "randomize_snapshot_flags",
26
+ "write_jsonl_records",
27
  ]
src/open_range/training/dataset.py ADDED
@@ -0,0 +1,170 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Dataset helpers for synthetic and bootstrap SFT records."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import copy
6
+ import json
7
+ from pathlib import Path
8
+ from typing import Any, Iterable
9
+
10
+ import yaml
11
+
12
+
13
+ def load_jsonl_records(paths: Iterable[str | Path]) -> list[dict[str, Any]]:
14
+ """Load newline-delimited JSON records from one or more files."""
15
+ records: list[dict[str, Any]] = []
16
+ for raw_path in paths:
17
+ path = Path(raw_path)
18
+ with path.open("r", encoding="utf-8") as handle:
19
+ for lineno, line in enumerate(handle, start=1):
20
+ text = line.strip()
21
+ if not text:
22
+ continue
23
+ payload = json.loads(text)
24
+ if not isinstance(payload, dict):
25
+ raise TypeError(f"{path}:{lineno} is not a JSON object")
26
+ records.append(payload)
27
+ return records
28
+
29
+
30
+ def load_tool_context(paths: Iterable[str | Path]) -> str:
31
+ """Load and normalize a tool-context file or files."""
32
+ blocks: list[str] = []
33
+ for raw_path in paths:
34
+ path = Path(raw_path)
35
+ suffix = path.suffix.lower()
36
+ text = path.read_text(encoding="utf-8").strip()
37
+ if not text:
38
+ continue
39
+ if suffix in {".json", ".yaml", ".yml"}:
40
+ payload = json.loads(text) if suffix == ".json" else yaml.safe_load(text)
41
+ blocks.append(_render_tool_payload(payload))
42
+ else:
43
+ blocks.append(text)
44
+ return "\n\n".join(block for block in blocks if block.strip())
45
+
46
+
47
+ def append_tool_context(
48
+ records: list[dict[str, Any]],
49
+ tool_context: str,
50
+ ) -> list[dict[str, Any]]:
51
+ """Append tool descriptions to the first system prompt in each record."""
52
+ if not tool_context.strip():
53
+ return [copy.deepcopy(record) for record in records]
54
+
55
+ block = tool_context.strip()
56
+ if not block.lower().startswith("available tools"):
57
+ block = "Available tools:\n" + block
58
+
59
+ enriched: list[dict[str, Any]] = []
60
+ for record in records:
61
+ clone = copy.deepcopy(record)
62
+ messages = clone.get("messages", [])
63
+ if isinstance(messages, list):
64
+ for message in messages:
65
+ if not isinstance(message, dict):
66
+ continue
67
+ if message.get("role") != "system":
68
+ continue
69
+ content = str(message.get("content", "")).rstrip()
70
+ if block not in content:
71
+ message["content"] = f"{content}\n\n{block}".strip()
72
+ break
73
+ enriched.append(clone)
74
+ return enriched
75
+
76
+
77
+ def extract_bootstrap_messages(
78
+ records: list[dict[str, Any]],
79
+ *,
80
+ role: str = "red",
81
+ limit: int = 0,
82
+ ) -> list[dict[str, Any]]:
83
+ """Extract few-shot chat messages from prior SFT records."""
84
+ if limit <= 0:
85
+ return []
86
+
87
+ examples: list[dict[str, Any]] = []
88
+ ranked_records = sorted(records, key=_bootstrap_record_rank, reverse=True)
89
+ used = 0
90
+ for record in ranked_records:
91
+ record_role = (
92
+ str(record.get("role", "")).strip().lower()
93
+ or str(record.get("metadata", {}).get("role", "")).strip().lower()
94
+ )
95
+ if record_role and record_role != role:
96
+ continue
97
+
98
+ messages = record.get("messages", [])
99
+ if not isinstance(messages, list):
100
+ continue
101
+ example = [
102
+ copy.deepcopy(message)
103
+ for message in messages
104
+ if isinstance(message, dict)
105
+ ]
106
+ if example and example[0].get("role") == "system":
107
+ example = example[1:]
108
+ if not example:
109
+ continue
110
+
111
+ examples.extend(example)
112
+ used += 1
113
+ if used >= limit:
114
+ break
115
+
116
+ return examples
117
+
118
+
119
+ def write_jsonl_records(path: str | Path, records: list[dict[str, Any]]) -> int:
120
+ """Write JSONL records to *path*."""
121
+ output = Path(path)
122
+ output.parent.mkdir(parents=True, exist_ok=True)
123
+ with output.open("w", encoding="utf-8") as handle:
124
+ for record in records:
125
+ handle.write(json.dumps(record) + "\n")
126
+ return len(records)
127
+
128
+
129
+ def _render_tool_payload(payload: Any) -> str:
130
+ if isinstance(payload, str):
131
+ return payload.strip()
132
+ if isinstance(payload, dict):
133
+ lines = []
134
+ for key, value in payload.items():
135
+ if isinstance(value, str):
136
+ lines.append(f"- {key}: {value}")
137
+ else:
138
+ rendered = json.dumps(value, sort_keys=True)
139
+ lines.append(f"- {key}: {rendered}")
140
+ return "\n".join(lines)
141
+ if isinstance(payload, list):
142
+ lines = []
143
+ for item in payload:
144
+ if isinstance(item, dict):
145
+ name = str(item.get("name", "")).strip()
146
+ description = str(item.get("description", "")).strip()
147
+ if name and description:
148
+ lines.append(f"- {name}: {description}")
149
+ elif name:
150
+ lines.append(f"- {name}")
151
+ else:
152
+ lines.append(f"- {json.dumps(item, sort_keys=True)}")
153
+ else:
154
+ lines.append(f"- {item}")
155
+ return "\n".join(lines)
156
+ return str(payload).strip()
157
+
158
+
159
+ def _bootstrap_record_rank(record: dict[str, Any]) -> tuple[int, int, int]:
160
+ metadata = record.get("metadata", {})
161
+ success = 1 if metadata.get("success") else 0
162
+ total_turns = int(metadata.get("total_turns") or 0)
163
+ tool_turns = sum(
164
+ 1
165
+ for message in record.get("messages", [])
166
+ if isinstance(message, dict)
167
+ and message.get("role") == "assistant"
168
+ and message.get("tool_calls")
169
+ )
170
+ return success, tool_turns, total_turns
src/open_range/training/synthetic.py CHANGED
@@ -16,8 +16,9 @@ from pathlib import Path
16
  from typing import Any
17
 
18
  from open_range.agents.llm_agent import LLMRangeAgent
 
19
  from open_range.agents.protocol import RangeAgent
20
- from open_range.agents.scripted_agent import ScriptedBlueAgent, ScriptedRedAgent
21
  from open_range.builder.builder import LLMSnapshotBuilder, TemplateOnlyBuilder
22
  from open_range.protocols import BuildContext, SnapshotBuilder, SnapshotSpec, Vulnerability
23
  from open_range.server.environment import RangeEnvironment
@@ -27,6 +28,14 @@ from open_range.training.trajectory import TrajectoryLogger
27
  logger = logging.getLogger(__name__)
28
 
29
  _TOKEN_RE = re.compile(r"[a-z0-9_./:-]+")
 
 
 
 
 
 
 
 
30
 
31
 
32
  def _run_async(coro: Any) -> Any:
@@ -106,6 +115,207 @@ def _observation_text(observation: str | RangeObservation) -> str:
106
  return "\n\n".join(parts)
107
 
108
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
  class SyntheticRangeEnvironment(RangeEnvironment):
110
  """Fast, deterministic simulator built from a ``SnapshotSpec``."""
111
 
@@ -162,6 +372,12 @@ class SyntheticRangeEnvironment(RangeEnvironment):
162
  return "kali\n", ""
163
  if normalized == "pwd":
164
  return "/root\n", ""
 
 
 
 
 
 
165
  if normalized.startswith("ls"):
166
  return self._render_ls(command), ""
167
  if normalized.startswith("cat "):
@@ -320,6 +536,37 @@ class SyntheticRangeEnvironment(RangeEnvironment):
320
  return "220 mail ESMTP Postfix\n"
321
  return "HTTP/1.1 200 OK\n"
322
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
323
  def _render_mysql(self, command: str) -> str:
324
  lowered = command.lower()
325
  flag = self._flag_value()
@@ -365,6 +612,8 @@ class SyntheticRangeEnvironment(RangeEnvironment):
365
  return "", "cat: missing operand"
366
  if path in self._ephemeral_files:
367
  return self._ephemeral_files[path], ""
 
 
368
  for flag in self._snapshot.flags if self._snapshot else []:
369
  if path == flag.path or path.endswith(Path(flag.path).name):
370
  return f"{flag.value}\n", ""
@@ -381,12 +630,30 @@ class SyntheticRangeEnvironment(RangeEnvironment):
381
  return "root:x:0:0:root:/root:/bin/bash\nwww-data:x:33:33:www-data:/var/www:/usr/sbin/nologin\n", ""
382
  return "", f"cat: {path}: No such file or directory"
383
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
384
  def _render_ls(self, command: str) -> str:
385
  path = self._extract_first_path(command) or "."
386
  if path in (".", "/root"):
387
  entries = ["notes.txt"]
388
  entries.extend(sorted(Path(p).name for p in self._ephemeral_files))
389
  return "\n".join(sorted(set(entries))) + "\n"
 
 
390
  if path == "/var/log/siem":
391
  return "consolidated\nalerts.log\nweb_access.log\n"
392
  if self._snapshot and self._snapshot.files:
@@ -495,6 +762,7 @@ class SyntheticTraceGenerator:
495
  builder: SnapshotBuilder | None = None,
496
  red_agent: RangeAgent | None = None,
497
  blue_agent: RangeAgent | None = None,
 
498
  tier: int = 1,
499
  max_steps: int = 30,
500
  randomize_flags: bool = True,
@@ -507,6 +775,7 @@ class SyntheticTraceGenerator:
507
  self._tier = tier
508
  self._max_steps = max_steps
509
  self._randomize_flags = randomize_flags
 
510
  self.red_agent = red_agent or ScriptedRedAgent()
511
  self.blue_agent = blue_agent or ScriptedBlueAgent()
512
 
@@ -517,6 +786,7 @@ class SyntheticTraceGenerator:
517
  *,
518
  red_agent: RangeAgent | None = None,
519
  blue_agent: RangeAgent | None = None,
 
520
  builder: SnapshotBuilder | None = None,
521
  template_only: bool = True,
522
  builder_model: str | None = None,
@@ -537,6 +807,7 @@ class SyntheticTraceGenerator:
537
  builder=resolved_builder,
538
  red_agent=red_agent,
539
  blue_agent=blue_agent,
 
540
  tier=tier,
541
  max_steps=max_steps,
542
  randomize_flags=randomize_flags,
@@ -605,18 +876,29 @@ class SyntheticTraceGenerator:
605
  if active_snapshot is None:
606
  raise RuntimeError("Synthetic environment failed to load a snapshot")
607
 
608
- task = active_snapshot.task
609
- red_briefing = getattr(task, "red_briefing", "") or "Begin the assessment."
610
- blue_briefing = getattr(task, "blue_briefing", "") or "Monitor the range."
 
 
 
 
 
611
 
612
- self.red_agent.reset(briefing=red_briefing, role="red")
613
- self.blue_agent.reset(briefing=blue_briefing, role="blue")
 
 
614
 
615
  snapshot_id = active_snapshot.topology.get("snapshot_id", f"synth-{episode_index:04d}")
616
  logger.start_episode(
617
  episode_id=f"synth-{episode_index:04d}",
618
  snapshot_id=snapshot_id,
619
  tier=env.state.tier,
 
 
 
 
620
  )
621
 
622
  current_red_observation: str | RangeObservation = red_briefing
@@ -626,35 +908,64 @@ class SyntheticTraceGenerator:
626
  last_obs: RangeObservation = RangeObservation(stdout=red_briefing)
627
 
628
  while step < self._max_steps and not done:
629
- red_cmd = self.red_agent.act(current_red_observation)
630
- red_view = _observation_text(current_red_observation)
631
- red_obs = env.step(RangeAction(command=red_cmd, mode="red"))
632
- logger.log_turn(
633
- role="red",
634
- observation=red_view,
635
- action=red_cmd,
636
- reward=float(red_obs.reward or 0.0),
637
- )
638
- step += 1
639
- last_obs = red_obs
640
- done = bool(red_obs.done)
641
- current_blue_observation = red_obs
642
- if done or step >= self._max_steps:
643
- break
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
644
 
645
  blue_cmd = self.blue_agent.act(current_blue_observation)
646
- blue_view = _observation_text(current_blue_observation)
647
  blue_obs = env.step(RangeAction(command=blue_cmd, mode="blue"))
 
 
 
 
 
648
  logger.log_turn(
649
  role="blue",
650
- observation=blue_view,
651
  action=blue_cmd,
652
  reward=float(blue_obs.reward or 0.0),
 
 
 
 
 
 
 
 
653
  )
654
  step += 1
655
  last_obs = blue_obs
656
  done = bool(blue_obs.done)
657
- current_red_observation = blue_obs
658
 
659
  state = env.state
660
  outcome = self._episode_outcome(env)
@@ -666,6 +977,13 @@ class SyntheticTraceGenerator:
666
  "red_actions": len(env.red_history),
667
  "blue_actions": len(env.blue_history),
668
  "done": bool(last_obs.done),
 
 
 
 
 
 
 
669
  },
670
  )
671
  finally:
@@ -689,14 +1007,27 @@ def build_teacher_agents(
689
  roles: tuple[str, ...] = ("red",),
690
  red_model: str | None = None,
691
  blue_model: str | None = None,
 
 
 
 
692
  temperature: float | None = 0.2,
693
  max_tokens: int = 512,
694
  **litellm_kwargs: Any,
695
  ) -> tuple[RangeAgent, RangeAgent]:
696
  """Construct teacher agents for the selected roles, scripted fallbacks otherwise."""
 
 
 
 
 
 
 
697
  if "red" in roles and (red_model or teacher_model):
698
  red_agent: RangeAgent = LLMRangeAgent(
699
  model=red_model or str(teacher_model),
 
 
700
  temperature=temperature,
701
  max_tokens=max_tokens,
702
  **litellm_kwargs,
@@ -707,6 +1038,8 @@ def build_teacher_agents(
707
  if "blue" in roles and (blue_model or teacher_model):
708
  blue_agent: RangeAgent = LLMRangeAgent(
709
  model=blue_model or str(teacher_model),
 
 
710
  temperature=temperature,
711
  max_tokens=max_tokens,
712
  **litellm_kwargs,
 
16
  from typing import Any
17
 
18
  from open_range.agents.llm_agent import LLMRangeAgent
19
+ from open_range.agents.parsing import strip_command_from_response
20
  from open_range.agents.protocol import RangeAgent
21
+ from open_range.agents.replay_agent import ScriptedBlueAgent, ScriptedRedAgent
22
  from open_range.builder.builder import LLMSnapshotBuilder, TemplateOnlyBuilder
23
  from open_range.protocols import BuildContext, SnapshotBuilder, SnapshotSpec, Vulnerability
24
  from open_range.server.environment import RangeEnvironment
 
28
  logger = logging.getLogger(__name__)
29
 
30
  _TOKEN_RE = re.compile(r"[a-z0-9_./:-]+")
31
+ _SYNTHETIC_REASONING_GUIDE = (
32
+ "When you act, think briefly inside <think>...</think> about what you learned, "
33
+ "what hypothesis you are testing, and why the next step is justified. "
34
+ "After the reasoning, output exactly one command prefixed with 'Command:'. "
35
+ "Prefer high-signal interaction with the listed services and artifacts over "
36
+ "repeating local workstation discovery commands. Do not claim success until "
37
+ "the tool output confirms it."
38
+ )
39
 
40
 
41
  def _run_async(coro: Any) -> Any:
 
115
  return "\n\n".join(parts)
116
 
117
 
118
+ def _prefixed_output(text: str, *, step: int) -> str:
119
+ """Add deterministic pseudo-timing prefixes to tool output lines."""
120
+ cleaned = text.strip("\n")
121
+ if not cleaned:
122
+ return f"[{0.2 + (step % 5) * 0.1:.1f}s]"
123
+ prefix = f"[{0.2 + (step % 5) * 0.1:.1f}s] "
124
+ return "\n".join(f"{prefix}{line}" for line in cleaned.splitlines())
125
+
126
+
127
+ def _host_inventory(snapshot: SnapshotSpec) -> str:
128
+ host_lines: list[str] = []
129
+ zones = snapshot.topology.get("zones", {}) if isinstance(snapshot.topology, dict) else {}
130
+ zone_map: dict[str, str] = {}
131
+ if isinstance(zones, dict):
132
+ for zone, hosts in zones.items():
133
+ if isinstance(hosts, list):
134
+ for host in hosts:
135
+ zone_map[str(host)] = str(zone)
136
+
137
+ for host in _iter_hosts(snapshot):
138
+ details = [zone_map.get(host, "").strip()]
139
+ if host == "web":
140
+ details.append("primary web application")
141
+ elif host == "mail":
142
+ details.append("mail gateway")
143
+ elif host == "db":
144
+ details.append("database service")
145
+ elif host == "files":
146
+ details.append("file share service")
147
+ elif host == "ldap":
148
+ details.append("directory and identity service")
149
+ elif host == "siem":
150
+ details.append("log aggregation and monitoring")
151
+ elif host == "firewall":
152
+ details.append("perimeter routing and filtering")
153
+ elif host == "attacker":
154
+ details.append("your operator workstation")
155
+ rendered = ", ".join(part for part in details if part)
156
+ if rendered:
157
+ host_lines.append(f"- {host}: {rendered}")
158
+ else:
159
+ host_lines.append(f"- {host}")
160
+ return "\n".join(host_lines)
161
+
162
+
163
+ def _entry_points(snapshot: SnapshotSpec) -> str:
164
+ entries: list[str] = []
165
+ seen: set[str] = set()
166
+ for step in snapshot.golden_path:
167
+ for match in re.findall(r"https?://[^\s'\"`]+", step.command):
168
+ if match not in seen:
169
+ entries.append(f"- {match}")
170
+ seen.add(match)
171
+ for host in _iter_hosts(snapshot):
172
+ if host == "web":
173
+ for candidate in ("http://web/", "http://web/login", "http://web/search?q=test"):
174
+ if candidate not in seen:
175
+ entries.append(f"- {candidate}")
176
+ seen.add(candidate)
177
+ elif host == "mail":
178
+ target = "mail:25 (SMTP)"
179
+ if target not in seen:
180
+ entries.append(f"- {target}")
181
+ seen.add(target)
182
+ elif host == "db":
183
+ target = "db:3306 (MySQL)"
184
+ if target not in seen:
185
+ entries.append(f"- {target}")
186
+ seen.add(target)
187
+ elif host == "files":
188
+ target = "files:445 (SMB)"
189
+ if target not in seen:
190
+ entries.append(f"- {target}")
191
+ seen.add(target)
192
+ elif host == "ldap":
193
+ target = "ldap:389 (LDAP)"
194
+ if target not in seen:
195
+ entries.append(f"- {target}")
196
+ seen.add(target)
197
+ return "\n".join(entries)
198
+
199
+
200
+ def _build_training_briefing(snapshot: SnapshotSpec, *, role: str) -> str:
201
+ task = snapshot.task
202
+ org_name = str(snapshot.topology.get("org_name", "OpenRange target")).strip() or "OpenRange target"
203
+ tier = int(snapshot.topology.get("tier", 1) or 1)
204
+ host_summary = _host_inventory(snapshot)
205
+ success_conditions = task.success_conditions if hasattr(task, "success_conditions") else []
206
+ objective_lines = []
207
+ for item in success_conditions:
208
+ if isinstance(item, dict):
209
+ description = str(item.get("description") or item.get("type") or "").strip()
210
+ if description:
211
+ objective_lines.append(f"- {description}")
212
+
213
+ if role == "red":
214
+ scenario = task.red_briefing or "Investigate the environment and recover the flag."
215
+ mission = [
216
+ "# ROLE",
217
+ "You are an autonomous red-team operator authorized to assess this environment.",
218
+ "",
219
+ "# TARGET INFORMATION",
220
+ f"**Organization**: {org_name}",
221
+ f"**Tier**: {tier}",
222
+ "**Goal**: Identify a viable attack path, validate it with evidence, and recover the exercise flag.",
223
+ "",
224
+ "## Scenario Brief",
225
+ scenario,
226
+ "",
227
+ "## Reachable Hosts",
228
+ host_summary or "- attacker",
229
+ "",
230
+ "## Known Access Points",
231
+ _entry_points(snapshot),
232
+ "",
233
+ "## Operating Notes",
234
+ "- Service hostnames are already known from the range context.",
235
+ "- Favor concrete interaction with listed services over repeated local interface discovery.",
236
+ ]
237
+ if objective_lines:
238
+ mission.extend(["", "## Success Conditions", *objective_lines])
239
+ return "\n".join(mission).strip()
240
+
241
+ scenario = task.blue_briefing or "Monitor the SIEM, investigate suspicious activity, and respond."
242
+ mission = [
243
+ "# ROLE",
244
+ "You are an autonomous SOC analyst responsible for this environment.",
245
+ "",
246
+ "# DEFENSIVE CONTEXT",
247
+ f"**Organization**: {org_name}",
248
+ f"**Tier**: {tier}",
249
+ "**Goal**: Detect attacker activity, confirm it with log evidence, and respond without breaking healthy services.",
250
+ "",
251
+ "## Scenario Brief",
252
+ scenario,
253
+ "",
254
+ "## Monitored Hosts",
255
+ host_summary or "- siem",
256
+ ]
257
+ return "\n".join(mission).strip()
258
+
259
+
260
+ def _infer_tool_call(command: str) -> tuple[str, dict[str, Any]]:
261
+ stripped = command.strip()
262
+ lowered = stripped.lower()
263
+ try:
264
+ parts = shlex.split(stripped)
265
+ except ValueError:
266
+ parts = stripped.split()
267
+
268
+ if lowered.startswith("submit_flag "):
269
+ flag = stripped.split(maxsplit=1)[1] if len(stripped.split(maxsplit=1)) == 2 else ""
270
+ return "flag_found", {"flag": flag}
271
+ if lowered.startswith("cat ") and len(parts) >= 2:
272
+ return "read_file", {"file_path": parts[1], "line_numbers": True}
273
+ if lowered.startswith("grep "):
274
+ return "grep", {"command": stripped}
275
+ if lowered.startswith("find "):
276
+ return "file_search", {"command": stripped}
277
+ return "shell_command", {"command": stripped, "timeout": 120}
278
+
279
+
280
+ def _render_trace_reasoning(agent: RangeAgent, *, role: str, command: str) -> str:
281
+ raw_response = str(getattr(agent, "last_response_text", "") or "").strip()
282
+ if raw_response:
283
+ reasoning = strip_command_from_response(raw_response, command)
284
+ if reasoning:
285
+ return reasoning
286
+
287
+ lowered = command.lower()
288
+ if "nmap" in lowered:
289
+ thought = "I need a service inventory first so I can narrow the exposed attack surface."
290
+ elif "curl" in lowered and ("union" in lowered or "select" in lowered):
291
+ thought = "The web workflow looks injectable, so I will test a UNION-style payload that can expose sensitive data."
292
+ elif "curl" in lowered:
293
+ thought = "I should interrogate the web surface directly to learn the available routes and behaviors."
294
+ elif lowered.startswith("cat "):
295
+ thought = "I need to inspect the referenced file directly for source-level clues, credentials, or the flag."
296
+ elif lowered.startswith("grep "):
297
+ thought = (
298
+ "I need to filter the SIEM signal down to evidence that confirms the current hypothesis."
299
+ if role == "blue"
300
+ else "I should search the available artifacts for indicators that support the next exploit step."
301
+ )
302
+ elif lowered.startswith("submit_flag "):
303
+ thought = "The recovered token is strong enough to validate immediately."
304
+ elif lowered.startswith("submit_finding "):
305
+ thought = "The observed activity is concrete enough to escalate as a finding."
306
+ else:
307
+ thought = "I will take the next low-risk step that reduces uncertainty and advances the objective."
308
+ return f"<think>\n{thought}\n</think>"
309
+
310
+
311
+ def _blue_stimulus(env: SyntheticRangeEnvironment) -> RangeObservation:
312
+ alerts = env._get_pending_alerts()
313
+ status = "Suspicious activity has been observed in the monitored environment."
314
+ if not alerts:
315
+ status = "No high-confidence alerts yet. Continue monitoring for attacker activity."
316
+ return RangeObservation(stdout=status, alerts=alerts)
317
+
318
+
319
  class SyntheticRangeEnvironment(RangeEnvironment):
320
  """Fast, deterministic simulator built from a ``SnapshotSpec``."""
321
 
 
372
  return "kali\n", ""
373
  if normalized == "pwd":
374
  return "/root\n", ""
375
+ if normalized.startswith("ip ") or normalized in {"ip", "hostname -i", "hostname -i && ip route && ip -br addr", "hostname -i && ip route"}:
376
+ return self._render_network_identity(command), ""
377
+ if normalized.startswith("arp"):
378
+ return self._render_arp_cache(), ""
379
+ if normalized.startswith("getent hosts"):
380
+ return self._render_hosts_lookup(command), ""
381
  if normalized.startswith("ls"):
382
  return self._render_ls(command), ""
383
  if normalized.startswith("cat "):
 
536
  return "220 mail ESMTP Postfix\n"
537
  return "HTTP/1.1 200 OK\n"
538
 
539
+ def _render_network_identity(self, command: str) -> str:
540
+ del command
541
+ lines = [
542
+ "lo UNKNOWN 127.0.0.1/8",
543
+ "eth0 UP 10.0.0.2/24",
544
+ "default via 10.0.0.1 dev eth0",
545
+ "10.0.0.0/24 dev eth0 proto kernel scope link src 10.0.0.2",
546
+ ]
547
+ return "\n".join(lines) + "\n"
548
+
549
+ def _render_arp_cache(self) -> str:
550
+ return (
551
+ "? (10.0.0.1) at 02:42:0a:00:00:01 [ether] on eth0\n"
552
+ "? (10.0.1.4) at 02:42:0a:00:01:04 [ether] on eth0\n"
553
+ )
554
+
555
+ def _render_hosts_lookup(self, command: str) -> str:
556
+ hosts = []
557
+ for index, host in enumerate(_iter_hosts(self._snapshot) if self._snapshot else [], start=2):
558
+ if host == "attacker":
559
+ continue
560
+ hosts.append(f"10.0.{index // 4}.{index + 1} {host}")
561
+ if not hosts:
562
+ hosts = ["10.0.1.4 web", "10.0.2.10 db", "10.0.3.10 ldap"]
563
+ requested = command.lower().split()[2:] if len(command.split()) > 2 else []
564
+ if requested:
565
+ filtered = [line for line in hosts if any(name in line for name in requested)]
566
+ if filtered:
567
+ hosts = filtered
568
+ return "\n".join(hosts) + "\n"
569
+
570
  def _render_mysql(self, command: str) -> str:
571
  lowered = command.lower()
572
  flag = self._flag_value()
 
612
  return "", "cat: missing operand"
613
  if path in self._ephemeral_files:
614
  return self._ephemeral_files[path], ""
615
+ if path in ("/etc/hosts", "etc/hosts"):
616
+ return self._render_etc_hosts(), ""
617
  for flag in self._snapshot.flags if self._snapshot else []:
618
  if path == flag.path or path.endswith(Path(flag.path).name):
619
  return f"{flag.value}\n", ""
 
630
  return "root:x:0:0:root:/root:/bin/bash\nwww-data:x:33:33:www-data:/var/www:/usr/sbin/nologin\n", ""
631
  return "", f"cat: {path}: No such file or directory"
632
 
633
+ def _render_etc_hosts(self) -> str:
634
+ entries = ["127.0.0.1 localhost", "10.0.0.2 attacker"]
635
+ host_map = {
636
+ "firewall": "10.0.0.3",
637
+ "mail": "10.0.1.3",
638
+ "web": "10.0.1.4",
639
+ "db": "10.0.2.10",
640
+ "files": "10.0.2.20",
641
+ "ldap": "10.0.3.10",
642
+ "siem": "10.0.3.20",
643
+ }
644
+ for host in _iter_hosts(self._snapshot) if self._snapshot else []:
645
+ if host in host_map:
646
+ entries.append(f"{host_map[host]} {host}")
647
+ return "\n".join(entries) + "\n"
648
+
649
  def _render_ls(self, command: str) -> str:
650
  path = self._extract_first_path(command) or "."
651
  if path in (".", "/root"):
652
  entries = ["notes.txt"]
653
  entries.extend(sorted(Path(p).name for p in self._ephemeral_files))
654
  return "\n".join(sorted(set(entries))) + "\n"
655
+ if path == "/":
656
+ return "bin\netc\nhome\nroot\ntmp\nusr\nvar\n"
657
  if path == "/var/log/siem":
658
  return "consolidated\nalerts.log\nweb_access.log\n"
659
  if self._snapshot and self._snapshot.files:
 
762
  builder: SnapshotBuilder | None = None,
763
  red_agent: RangeAgent | None = None,
764
  blue_agent: RangeAgent | None = None,
765
+ active_roles: tuple[str, ...] = ("red", "blue"),
766
  tier: int = 1,
767
  max_steps: int = 30,
768
  randomize_flags: bool = True,
 
775
  self._tier = tier
776
  self._max_steps = max_steps
777
  self._randomize_flags = randomize_flags
778
+ self._active_roles = tuple(dict.fromkeys(active_roles)) or ("red", "blue")
779
  self.red_agent = red_agent or ScriptedRedAgent()
780
  self.blue_agent = blue_agent or ScriptedBlueAgent()
781
 
 
786
  *,
787
  red_agent: RangeAgent | None = None,
788
  blue_agent: RangeAgent | None = None,
789
+ active_roles: tuple[str, ...] = ("red", "blue"),
790
  builder: SnapshotBuilder | None = None,
791
  template_only: bool = True,
792
  builder_model: str | None = None,
 
807
  builder=resolved_builder,
808
  red_agent=red_agent,
809
  blue_agent=blue_agent,
810
+ active_roles=active_roles,
811
  tier=tier,
812
  max_steps=max_steps,
813
  randomize_flags=randomize_flags,
 
876
  if active_snapshot is None:
877
  raise RuntimeError("Synthetic environment failed to load a snapshot")
878
 
879
+ red_briefing = _build_training_briefing(
880
+ active_snapshot,
881
+ role="red",
882
+ )
883
+ blue_briefing = _build_training_briefing(
884
+ active_snapshot,
885
+ role="blue",
886
+ )
887
 
888
+ if "red" in self._active_roles:
889
+ self.red_agent.reset(briefing=red_briefing, role="red")
890
+ if "blue" in self._active_roles:
891
+ self.blue_agent.reset(briefing=blue_briefing, role="blue")
892
 
893
  snapshot_id = active_snapshot.topology.get("snapshot_id", f"synth-{episode_index:04d}")
894
  logger.start_episode(
895
  episode_id=f"synth-{episode_index:04d}",
896
  snapshot_id=snapshot_id,
897
  tier=env.state.tier,
898
+ briefings={
899
+ "red": red_briefing,
900
+ "blue": blue_briefing,
901
+ },
902
  )
903
 
904
  current_red_observation: str | RangeObservation = red_briefing
 
908
  last_obs: RangeObservation = RangeObservation(stdout=red_briefing)
909
 
910
  while step < self._max_steps and not done:
911
+ if "red" in self._active_roles:
912
+ red_cmd = self.red_agent.act(current_red_observation)
913
+ red_obs = env.step(RangeAction(command=red_cmd, mode="red"))
914
+ red_output = _prefixed_output(
915
+ _observation_text(red_obs),
916
+ step=step + 1,
917
+ )
918
+ tool_name, tool_arguments = _infer_tool_call(red_cmd)
919
+ logger.log_turn(
920
+ role="red",
921
+ observation=red_output,
922
+ action=red_cmd,
923
+ reward=float(red_obs.reward or 0.0),
924
+ assistant_content=_render_trace_reasoning(
925
+ self.red_agent,
926
+ role="red",
927
+ command=red_cmd,
928
+ ),
929
+ tool_name=tool_name,
930
+ tool_arguments=tool_arguments,
931
+ tool_output=red_output,
932
+ )
933
+ step += 1
934
+ last_obs = red_obs
935
+ done = bool(red_obs.done)
936
+ current_red_observation = red_obs
937
+ current_blue_observation = _blue_stimulus(env)
938
+ if done or step >= self._max_steps:
939
+ break
940
+
941
+ if "blue" not in self._active_roles:
942
+ continue
943
 
944
  blue_cmd = self.blue_agent.act(current_blue_observation)
 
945
  blue_obs = env.step(RangeAction(command=blue_cmd, mode="blue"))
946
+ blue_output = _prefixed_output(
947
+ _observation_text(blue_obs),
948
+ step=step + 1,
949
+ )
950
+ tool_name, tool_arguments = _infer_tool_call(blue_cmd)
951
  logger.log_turn(
952
  role="blue",
953
+ observation=blue_output,
954
  action=blue_cmd,
955
  reward=float(blue_obs.reward or 0.0),
956
+ assistant_content=_render_trace_reasoning(
957
+ self.blue_agent,
958
+ role="blue",
959
+ command=blue_cmd,
960
+ ),
961
+ tool_name=tool_name,
962
+ tool_arguments=tool_arguments,
963
+ tool_output=blue_output,
964
  )
965
  step += 1
966
  last_obs = blue_obs
967
  done = bool(blue_obs.done)
968
+ current_blue_observation = blue_obs
969
 
970
  state = env.state
971
  outcome = self._episode_outcome(env)
 
977
  "red_actions": len(env.red_history),
978
  "blue_actions": len(env.blue_history),
979
  "done": bool(last_obs.done),
980
+ "source": "open_range.synthetic",
981
+ "ground_truth_flags": [flag.value for flag in active_snapshot.flags],
982
+ "optimal_steps": len(active_snapshot.golden_path),
983
+ "metadata": {
984
+ "generator": "synthetic",
985
+ "snapshot_origin": "manifest" if self._manifest is not None else "snapshot",
986
+ },
987
  },
988
  )
989
  finally:
 
1007
  roles: tuple[str, ...] = ("red",),
1008
  red_model: str | None = None,
1009
  blue_model: str | None = None,
1010
+ red_bootstrap_messages: list[dict[str, Any]] | None = None,
1011
+ blue_bootstrap_messages: list[dict[str, Any]] | None = None,
1012
+ red_system_suffix: str = "",
1013
+ blue_system_suffix: str = "",
1014
  temperature: float | None = 0.2,
1015
  max_tokens: int = 512,
1016
  **litellm_kwargs: Any,
1017
  ) -> tuple[RangeAgent, RangeAgent]:
1018
  """Construct teacher agents for the selected roles, scripted fallbacks otherwise."""
1019
+ red_suffix = "\n\n".join(
1020
+ block for block in (_SYNTHETIC_REASONING_GUIDE, red_system_suffix.strip()) if block
1021
+ )
1022
+ blue_suffix = "\n\n".join(
1023
+ block for block in (_SYNTHETIC_REASONING_GUIDE, blue_system_suffix.strip()) if block
1024
+ )
1025
+
1026
  if "red" in roles and (red_model or teacher_model):
1027
  red_agent: RangeAgent = LLMRangeAgent(
1028
  model=red_model or str(teacher_model),
1029
+ bootstrap_messages=red_bootstrap_messages,
1030
+ system_suffix=red_suffix,
1031
  temperature=temperature,
1032
  max_tokens=max_tokens,
1033
  **litellm_kwargs,
 
1038
  if "blue" in roles and (blue_model or teacher_model):
1039
  blue_agent: RangeAgent = LLMRangeAgent(
1040
  model=blue_model or str(teacher_model),
1041
+ bootstrap_messages=blue_bootstrap_messages,
1042
+ system_suffix=blue_suffix,
1043
  temperature=temperature,
1044
  max_tokens=max_tokens,
1045
  **litellm_kwargs,
src/open_range/training/trajectory.py CHANGED
@@ -20,33 +20,36 @@ import time
20
  from dataclasses import dataclass, field
21
  from pathlib import Path
22
  from typing import Any
 
23
 
24
  # ---------------------------------------------------------------------------
25
  # System prompts (mirrors docs/red-blue-agents.md)
26
  # ---------------------------------------------------------------------------
27
 
28
  RED_SYSTEM_PROMPT = (
29
- "You are a penetration tester conducting an authorized security assessment. "
30
- "You have access to a Kali Linux workstation on an external network. "
31
- "Your goal is to identify vulnerabilities, exploit them, and capture flags.\n\n"
32
- "Available actions:\n"
33
- "- Run any command on your attacker workstation (nmap, curl, sqlmap, ssh, etc.)\n"
34
- "- submit_flag <flag_string> -- submit a captured flag for verification\n"
35
- "- submit_evidence <json> -- document your findings\n\n"
36
- "Respond with a single shell command to execute."
 
 
37
  )
38
 
39
  BLUE_SYSTEM_PROMPT = (
40
- "You are a SOC analyst defending a corporate network. "
41
- "You have access to the SIEM console on the management network. "
42
- "Your goal is to detect attacks, investigate incidents, and remediate vulnerabilities.\n\n"
43
- "Available actions:\n"
44
- "- Query logs: grep, tail, awk on /var/log/siem/\n"
45
- "- submit_finding <description> -- report a detected attack\n"
46
- "- patch <host> <file> -- apply a security patch\n"
47
- "- iptables rules -- modify firewall\n"
48
- "- check_services -- verify all services are running\n\n"
49
- "Respond with a single shell command to execute."
50
  )
51
 
52
 
@@ -60,14 +63,23 @@ class Turn:
60
  """A single turn within an episode."""
61
 
62
  role: str # "red" or "blue"
63
- observation: str # what the agent saw
64
  action: str # what the agent did
65
  reward: float # per-step reward
 
 
 
 
 
66
  timestamp: float = 0.0
67
 
68
  def __post_init__(self) -> None:
69
  if self.timestamp == 0.0:
70
  self.timestamp = time.time()
 
 
 
 
71
 
72
 
73
  @dataclass
@@ -80,6 +92,7 @@ class Episode:
80
  turns: list[Turn] = field(default_factory=list)
81
  outcome: str = "" # "flag_captured", "blue_defended", "timeout"
82
  metrics: dict[str, Any] = field(default_factory=dict)
 
83
  started_at: float = 0.0
84
  ended_at: float = 0.0
85
 
@@ -103,27 +116,48 @@ class Episode:
103
  """Sum of rewards for Blue turns."""
104
  return sum(t.reward for t in self.blue_turns)
105
 
106
- def to_chat_messages(self, role: str) -> list[dict[str, str]]:
107
- """Convert turns for a given role to OpenAI chat format.
108
-
109
- Each agent's trajectory is an independent training example:
110
- - system: role-specific system prompt
111
- - user: observation (environment output)
112
- - assistant: action (agent command)
113
-
114
- Interleaving is preserved: the agent's observations include
115
- the environment's responses to both its own and the opponent's
116
- actions (since they share infrastructure).
117
- """
118
  system_prompt = RED_SYSTEM_PROMPT if role == "red" else BLUE_SYSTEM_PROMPT
119
- messages: list[dict[str, str]] = [
120
  {"role": "system", "content": system_prompt},
121
  ]
 
 
 
 
 
 
122
 
123
  role_turns = [t for t in self.turns if t.role == role]
124
  for turn in role_turns:
125
- messages.append({"role": "user", "content": turn.observation})
126
- messages.append({"role": "assistant", "content": turn.action})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
127
 
128
  return messages
129
 
@@ -192,6 +226,7 @@ class TrajectoryLogger:
192
  episode_id: str,
193
  snapshot_id: str = "",
194
  tier: int = 1,
 
195
  ) -> Episode:
196
  """Begin recording a new episode.
197
 
@@ -213,6 +248,7 @@ class TrajectoryLogger:
213
  episode_id=episode_id,
214
  snapshot_id=snapshot_id,
215
  tier=tier,
 
216
  started_at=time.time(),
217
  )
218
  return self._current
@@ -223,6 +259,11 @@ class TrajectoryLogger:
223
  observation: str,
224
  action: str,
225
  reward: float = 0.0,
 
 
 
 
 
226
  ) -> Turn:
227
  """Record a single turn in the current episode.
228
 
@@ -248,6 +289,10 @@ class TrajectoryLogger:
248
  observation=observation,
249
  action=action,
250
  reward=reward,
 
 
 
 
251
  )
252
  self._current.turns.append(turn)
253
  return turn
 
20
  from dataclasses import dataclass, field
21
  from pathlib import Path
22
  from typing import Any
23
+ from uuid import uuid4
24
 
25
  # ---------------------------------------------------------------------------
26
  # System prompts (mirrors docs/red-blue-agents.md)
27
  # ---------------------------------------------------------------------------
28
 
29
  RED_SYSTEM_PROMPT = (
30
+ "You are an expert penetration tester conducting an authorized security assessment.\n"
31
+ "Your objective is to recover the exercise flag by systematically exploring the target,\n"
32
+ "validating hypotheses with tools, and only claiming success after you see evidence.\n\n"
33
+ "Available tools:\n"
34
+ "- shell_command: Run a shell command and capture stdout/stderr\n"
35
+ "- read_file: Inspect source code, configs, or challenge artifacts directly\n"
36
+ "- grep: Search files and logs for indicators, credentials, or flag material\n"
37
+ "- file_search: Locate files by name or path fragment\n"
38
+ "- flag_found: Submit a recovered flag immediately\n\n"
39
+ "Work methodically: recon -> enumerate -> inspect -> exploit -> verify."
40
  )
41
 
42
  BLUE_SYSTEM_PROMPT = (
43
+ "You are an experienced SOC analyst defending a corporate network.\n"
44
+ "Your objective is to detect attacker activity, validate the evidence in the SIEM,\n"
45
+ "and take measured remediation steps without breaking healthy services.\n\n"
46
+ "Available tools:\n"
47
+ "- shell_command: Run shell commands on the SIEM host and collect output\n"
48
+ "- grep: Search logs for indicators, pivots, or suspicious patterns\n"
49
+ "- read_file: Inspect configurations and aggregated log files directly\n"
50
+ "- patch: Apply a remediation to a specific host or file\n"
51
+ "- check_services: Verify availability after defensive actions\n\n"
52
+ "Work methodically: monitor -> correlate -> confirm -> respond -> verify."
53
  )
54
 
55
 
 
63
  """A single turn within an episode."""
64
 
65
  role: str # "red" or "blue"
66
+ observation: str # tool output or environment response after the action
67
  action: str # what the agent did
68
  reward: float # per-step reward
69
+ assistant_content: str = ""
70
+ tool_name: str = "shell_command"
71
+ tool_arguments: dict[str, Any] = field(default_factory=dict)
72
+ tool_output: str = ""
73
+ tool_call_id: str = ""
74
  timestamp: float = 0.0
75
 
76
  def __post_init__(self) -> None:
77
  if self.timestamp == 0.0:
78
  self.timestamp = time.time()
79
+ if not self.tool_output:
80
+ self.tool_output = self.observation
81
+ if not self.tool_call_id:
82
+ self.tool_call_id = f"call_{uuid4().hex}"
83
 
84
 
85
  @dataclass
 
92
  turns: list[Turn] = field(default_factory=list)
93
  outcome: str = "" # "flag_captured", "blue_defended", "timeout"
94
  metrics: dict[str, Any] = field(default_factory=dict)
95
+ briefings: dict[str, str] = field(default_factory=dict)
96
  started_at: float = 0.0
97
  ended_at: float = 0.0
98
 
 
116
  """Sum of rewards for Blue turns."""
117
  return sum(t.reward for t in self.blue_turns)
118
 
119
+ def to_chat_messages(self, role: str) -> list[dict[str, Any]]:
120
+ """Convert turns for a given role to tool-style chat format."""
 
 
 
 
 
 
 
 
 
 
121
  system_prompt = RED_SYSTEM_PROMPT if role == "red" else BLUE_SYSTEM_PROMPT
122
+ messages: list[dict[str, Any]] = [
123
  {"role": "system", "content": system_prompt},
124
  ]
125
+ initial_briefing = self.briefings.get(role)
126
+ if not initial_briefing:
127
+ role_turns = [t for t in self.turns if t.role == role]
128
+ initial_briefing = role_turns[0].observation if role_turns else ""
129
+ if initial_briefing:
130
+ messages.append({"role": "user", "content": initial_briefing})
131
 
132
  role_turns = [t for t in self.turns if t.role == role]
133
  for turn in role_turns:
134
+ messages.append(
135
+ {
136
+ "role": "assistant",
137
+ "content": turn.assistant_content,
138
+ "tool_calls": [
139
+ {
140
+ "id": turn.tool_call_id,
141
+ "type": "function",
142
+ "function": {
143
+ "name": turn.tool_name,
144
+ "arguments": json.dumps(
145
+ turn.tool_arguments,
146
+ sort_keys=True,
147
+ ),
148
+ },
149
+ }
150
+ ],
151
+ }
152
+ )
153
+ messages.append(
154
+ {
155
+ "role": "tool",
156
+ "content": turn.tool_output,
157
+ "name": turn.tool_name,
158
+ "tool_call_id": turn.tool_call_id,
159
+ }
160
+ )
161
 
162
  return messages
163
 
 
226
  episode_id: str,
227
  snapshot_id: str = "",
228
  tier: int = 1,
229
+ briefings: dict[str, str] | None = None,
230
  ) -> Episode:
231
  """Begin recording a new episode.
232
 
 
248
  episode_id=episode_id,
249
  snapshot_id=snapshot_id,
250
  tier=tier,
251
+ briefings=dict(briefings or {}),
252
  started_at=time.time(),
253
  )
254
  return self._current
 
259
  observation: str,
260
  action: str,
261
  reward: float = 0.0,
262
+ *,
263
+ assistant_content: str = "",
264
+ tool_name: str = "shell_command",
265
+ tool_arguments: dict[str, Any] | None = None,
266
+ tool_output: str | None = None,
267
  ) -> Turn:
268
  """Record a single turn in the current episode.
269
 
 
289
  observation=observation,
290
  action=action,
291
  reward=reward,
292
+ assistant_content=assistant_content,
293
+ tool_name=tool_name,
294
+ tool_arguments=dict(tool_arguments or {}),
295
+ tool_output=tool_output or observation,
296
  )
297
  self._current.turns.append(turn)
298
  return turn
tests/test_agents.py CHANGED
@@ -2,7 +2,7 @@
2
 
3
  Covers:
4
  - RangeAgent protocol compliance for all agent types
5
- - ScriptedAgent command replay and fallback
6
  - extract_command parsing of various LLM output formats
7
  - run_episode orchestration with a mocked environment
8
  - evaluate harness with multiple episodes
@@ -19,7 +19,7 @@ from open_range.agents.protocol import (
19
  RangeAgent,
20
  )
21
  from open_range.agents.parsing import extract_command
22
- from open_range.agents.scripted_agent import (
23
  ScriptedAgent,
24
  ScriptedBlueAgent,
25
  ScriptedRedAgent,
@@ -488,7 +488,7 @@ class TestResolveAgents:
488
  from open_range.resolve import resolve_component
489
 
490
  agent = resolve_component(
491
- "open_range.agents.scripted_agent.ScriptedAgent",
492
  {"commands": ["echo test"]},
493
  RangeAgent,
494
  )
 
2
 
3
  Covers:
4
  - RangeAgent protocol compliance for all agent types
5
+ - replay-agent command replay and fallback
6
  - extract_command parsing of various LLM output formats
7
  - run_episode orchestration with a mocked environment
8
  - evaluate harness with multiple episodes
 
19
  RangeAgent,
20
  )
21
  from open_range.agents.parsing import extract_command
22
+ from open_range.agents.replay_agent import (
23
  ScriptedAgent,
24
  ScriptedBlueAgent,
25
  ScriptedRedAgent,
 
488
  from open_range.resolve import resolve_component
489
 
490
  agent = resolve_component(
491
+ "open_range.agents.replay_agent.ScriptedAgent",
492
  {"commands": ["echo test"]},
493
  RangeAgent,
494
  )
tests/test_demo.py CHANGED
@@ -1,4 +1,4 @@
1
- """Tests for the end-to-end scripted demo."""
2
 
3
  import json
4
  from pathlib import Path
 
1
+ """Tests for the end-to-end replay demo."""
2
 
3
  import json
4
  from pathlib import Path
tests/test_npc_reward_coupling.py ADDED
@@ -0,0 +1,365 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Tests for NPC ↔ reward system coupling.
2
+
3
+ Validates that NPC log entries contain the fields the reward system expects:
4
+ - ``label: "benign"`` on routine NPC actions (for FP penalty)
5
+ - ``source`` on all NPC log entries (for FP detection)
6
+ - ``type: "social_engineering"`` on reactive actions (for Red/Blue SE rewards)
7
+ - ``result: "success"/"blocked"`` on reactive actions
8
+
9
+ Also tests credential extraction from snapshot topology.
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ import pytest
15
+
16
+ from open_range.builder.npc.actions import (
17
+ NPCActionExecutor,
18
+ _extract_db_credentials,
19
+ _extract_db_tables,
20
+ _extract_shares,
21
+ _extract_ssh_credentials,
22
+ _extract_users,
23
+ _extract_web_pages,
24
+ _log,
25
+ _se_log,
26
+ )
27
+ from open_range.builder.npc.channels import ChatChannel, DocumentChannel, VoiceChannel
28
+ from open_range.builder.npc.chat_traffic import generate_chat_traffic
29
+ from open_range.protocols import NPCPersona, SnapshotSpec
30
+
31
+
32
+ # ===================================================================
33
+ # Fixtures
34
+ # ===================================================================
35
+
36
+
37
+ @pytest.fixture
38
+ def persona_low() -> NPCPersona:
39
+ return NPCPersona(
40
+ name="Alice Doe",
41
+ role="Receptionist",
42
+ department="Admin",
43
+ security_awareness=0.2,
44
+ susceptibility={"phishing_email": 0.8, "vishing": 0.7},
45
+ accounts={"email": "adoe@corp.local"},
46
+ )
47
+
48
+
49
+ @pytest.fixture
50
+ def persona_high() -> NPCPersona:
51
+ return NPCPersona(
52
+ name="Bob Smith",
53
+ role="CISO",
54
+ department="Security",
55
+ security_awareness=0.95,
56
+ susceptibility={"phishing_email": 0.1, "vishing": 0.1},
57
+ accounts={"email": "bsmith@corp.local"},
58
+ )
59
+
60
+
61
+ @pytest.fixture
62
+ def snapshot_with_creds() -> SnapshotSpec:
63
+ return SnapshotSpec(
64
+ topology={
65
+ "domain": "example.local",
66
+ "users": [
67
+ {"username": "dbadmin", "password": "S3cur3DB!", "hosts": ["db"]},
68
+ {"username": "sysop", "password": "R00tPw!", "hosts": ["web", "files"], "role": "admin"},
69
+ {"username": "appuser", "password": "AppPw123", "hosts": ["web"]},
70
+ ],
71
+ },
72
+ files={
73
+ "web:/var/www/html/index.php": "<?php echo 'hi'; ?>",
74
+ "web:/var/www/html/login.php": "<?php // login ?>",
75
+ "files:/srv/shares/finance/report.xlsx": "data",
76
+ "files:/srv/shares/hr/employees.csv": "data",
77
+ "db:sql": "CREATE TABLE app_db.users (id INT); INSERT INTO app_db.orders VALUES (1);",
78
+ },
79
+ )
80
+
81
+
82
+ @pytest.fixture
83
+ def snapshot_no_creds() -> SnapshotSpec:
84
+ return SnapshotSpec(topology={}, files={})
85
+
86
+
87
+ # ===================================================================
88
+ # Routine action log labels
89
+ # ===================================================================
90
+
91
+
92
+ class TestRoutineLogLabels:
93
+ """Routine NPC actions must have label='benign' and a source field."""
94
+
95
+ def test_log_has_benign_label(self, persona_low):
96
+ entry = _log(persona_low, "browse", "Browsed /index.php", "web:/index.php")
97
+ assert entry["label"] == "benign"
98
+
99
+ def test_log_has_source(self, persona_low):
100
+ entry = _log(persona_low, "browse", "Browsed /index.php", "web:/index.php")
101
+ assert entry["source"] == "web:/index.php"
102
+
103
+ def test_log_has_type_prefix(self, persona_low):
104
+ entry = _log(persona_low, "query_db", "Queried users", "db:query_log")
105
+ assert entry["type"] == "npc_query_db"
106
+
107
+ def test_log_has_persona(self, persona_low):
108
+ entry = _log(persona_low, "idle", "Reading", "none")
109
+ assert entry["persona"] == "Alice Doe"
110
+ assert entry["department"] == "Admin"
111
+
112
+
113
+ # ===================================================================
114
+ # Reactive (social engineering) log labels
115
+ # ===================================================================
116
+
117
+
118
+ class TestSELogLabels:
119
+ """Reactive NPC actions must have type='social_engineering' and result."""
120
+
121
+ def test_se_log_type(self, persona_low):
122
+ entry = _se_log(persona_low, "click_link", "Clicked link", "web:access_log", result="success")
123
+ assert entry["type"] == "social_engineering"
124
+
125
+ def test_se_log_result_success(self, persona_low):
126
+ entry = _se_log(persona_low, "click_link", "Clicked", "web:access_log", result="success")
127
+ assert entry["result"] == "success"
128
+
129
+ def test_se_log_result_blocked(self, persona_high):
130
+ entry = _se_log(persona_high, "report_to_IT", "Reported", "siem:alert", result="blocked")
131
+ assert entry["result"] == "blocked"
132
+
133
+ def test_se_log_label_reactive(self, persona_low):
134
+ entry = _se_log(persona_low, "share_credentials", "Leaked", "web+siem", result="success")
135
+ assert entry["label"] == "reactive"
136
+
137
+ def test_se_log_has_persona(self, persona_low):
138
+ entry = _se_log(persona_low, "ignore", "Ignored", "none", result="blocked")
139
+ assert entry["persona"] == "Alice Doe"
140
+
141
+
142
+ # ===================================================================
143
+ # Channel log labels
144
+ # ===================================================================
145
+
146
+
147
+ class TestChannelLogLabels:
148
+ """Channel log entries must have label='benign' and source."""
149
+
150
+ def test_chat_channel_log_has_label(self):
151
+ ch = ChatChannel()
152
+ ch.send_message("Alice", "Bob", "Hello!")
153
+ logs = ch.get_channel_log()
154
+ assert len(logs) == 1
155
+ assert logs[0]["label"] == "benign"
156
+ assert "source" in logs[0]
157
+
158
+ def test_voice_channel_log_has_label(self, persona_low):
159
+ ch = VoiceChannel()
160
+ call = ch.initiate_call("Attacker", "Alice", "IT support here")
161
+ ch.respond(persona_low, call)
162
+ logs = ch.get_call_log()
163
+ assert len(logs) == 1
164
+ assert logs[0]["label"] == "benign"
165
+ assert logs[0]["source"] == "voice:phone"
166
+
167
+ def test_document_channel_log_has_label(self, persona_low):
168
+ ch = DocumentChannel()
169
+ doc = ch.share_document("Attacker", "Alice", "report.pdf", "Quarterly report")
170
+ ch.inspect_document(persona_low, doc)
171
+ logs = ch.get_document_log()
172
+ assert len(logs) == 1
173
+ assert logs[0]["label"] == "benign"
174
+ assert "source" in logs[0]
175
+
176
+
177
+ class TestChatTrafficLabels:
178
+ """Chat traffic generation should produce labeled log entries."""
179
+
180
+ def test_generated_chat_has_labels(self, persona_low, persona_high):
181
+ ch = ChatChannel()
182
+ generate_chat_traffic(
183
+ personas=[persona_low, persona_high],
184
+ channel=ch,
185
+ num_messages=5,
186
+ seed=42,
187
+ )
188
+ logs = ch.get_channel_log()
189
+ assert len(logs) == 5
190
+ for entry in logs:
191
+ assert entry["label"] == "benign"
192
+ assert "source" in entry
193
+
194
+
195
+ # ===================================================================
196
+ # Credential extraction from snapshot topology
197
+ # ===================================================================
198
+
199
+
200
+ class TestCredentialExtraction:
201
+ """Credentials should be pulled from snapshot topology, not hardcoded."""
202
+
203
+ def test_db_creds_from_topology(self, snapshot_with_creds):
204
+ user, pwd = _extract_db_credentials(snapshot_with_creds)
205
+ assert user == "dbadmin"
206
+ assert pwd == "S3cur3DB!"
207
+
208
+ def test_db_creds_fallback(self, snapshot_no_creds):
209
+ user, pwd = _extract_db_credentials(snapshot_no_creds)
210
+ assert user == "app_user"
211
+ assert pwd == "AppUs3r!2024"
212
+
213
+ def test_ssh_creds_from_topology(self, snapshot_with_creds):
214
+ user, pwd = _extract_ssh_credentials(snapshot_with_creds)
215
+ assert user == "sysop"
216
+ assert pwd == "R00tPw!"
217
+
218
+ def test_ssh_creds_fallback(self, snapshot_no_creds):
219
+ user, pwd = _extract_ssh_credentials(snapshot_no_creds)
220
+ assert user == "admin"
221
+ assert pwd == "Adm1n!2024"
222
+
223
+
224
+ # ===================================================================
225
+ # Snapshot introspection
226
+ # ===================================================================
227
+
228
+
229
+ class TestSnapshotIntrospection:
230
+ """Verify snapshot-derived targets are generalizable."""
231
+
232
+ def test_extract_web_pages(self, snapshot_with_creds):
233
+ pages = _extract_web_pages(snapshot_with_creds)
234
+ assert "/index.php" in pages
235
+ assert "/login.php" in pages
236
+
237
+ def test_extract_shares(self, snapshot_with_creds):
238
+ shares = _extract_shares(snapshot_with_creds)
239
+ assert "finance" in shares
240
+ assert "hr" in shares
241
+
242
+ def test_extract_db_tables(self, snapshot_with_creds):
243
+ tables = _extract_db_tables(snapshot_with_creds)
244
+ assert "app_db.orders" in tables or "app_db.users" in tables
245
+
246
+ def test_extract_users(self, snapshot_with_creds):
247
+ users = _extract_users(snapshot_with_creds)
248
+ assert "dbadmin" in users
249
+ assert "sysop" in users
250
+
251
+ def test_empty_snapshot_pages(self, snapshot_no_creds):
252
+ pages = _extract_web_pages(snapshot_no_creds)
253
+ assert pages == ["/"]
254
+
255
+ def test_empty_snapshot_shares(self, snapshot_no_creds):
256
+ shares = _extract_shares(snapshot_no_creds)
257
+ assert shares == ["general"]
258
+
259
+
260
+ # ===================================================================
261
+ # Reward coupling integration
262
+ # ===================================================================
263
+
264
+
265
+ class TestRewardCoupling:
266
+ """End-to-end: NPC logs feed into reward computation correctly."""
267
+
268
+ def test_red_social_reward_recognizes_se_logs(self):
269
+ """CompositeRedReward.compute() should find social_engineering entries."""
270
+ from open_range.server.rewards import CompositeRedReward
271
+ from open_range.server.models import RangeAction, RangeObservation, RangeState
272
+
273
+ reward = CompositeRedReward()
274
+ action = RangeAction(command="nmap -sV web", mode="red")
275
+ obs = RangeObservation(stdout="")
276
+ state = RangeState(step_count=1, tier=1)
277
+ snapshot = SnapshotSpec()
278
+
279
+ # NPC log with a successful social engineering event
280
+ ctx = {
281
+ "red_history": [],
282
+ "blue_history": [],
283
+ "npc_traffic_log": [
284
+ {
285
+ "type": "social_engineering",
286
+ "label": "reactive",
287
+ "persona": "Alice",
288
+ "action": "click_link",
289
+ "result": "success",
290
+ "source": "web:access_log",
291
+ "timestamp": 1.0,
292
+ },
293
+ ],
294
+ }
295
+
296
+ score = reward.compute(action, obs, state, snapshot, ctx)
297
+ # social weight is 0.05, score is 1.0, tier 1 multiplier is 1.0
298
+ # So social contribution = 0.05 * 1.0 = 0.05
299
+ # Total should include social component
300
+ assert score > 0 # At minimum efficiency + social contribute
301
+
302
+ def test_blue_fp_penalty_uses_benign_label(self):
303
+ """CompositeBlueReward should penalize findings that match benign NPC sources."""
304
+ from open_range.server.rewards import CompositeBlueReward
305
+ from open_range.server.models import RangeAction, RangeObservation, RangeState
306
+
307
+ reward = CompositeBlueReward()
308
+ action = RangeAction(command="grep suspicious /var/log/siem/all.log", mode="blue")
309
+ obs = RangeObservation(stdout="")
310
+ state = RangeState(step_count=1, tier=1)
311
+ snapshot = SnapshotSpec()
312
+
313
+ # Blue submits a finding that matches a benign NPC source
314
+ ctx = {
315
+ "red_history": [],
316
+ "blue_history": [
317
+ {"type": "finding", "content": "Suspicious activity from chat:general"},
318
+ ],
319
+ "npc_traffic_log": [
320
+ {
321
+ "type": "chat",
322
+ "label": "benign",
323
+ "source": "chat:general",
324
+ "persona": "Alice",
325
+ "timestamp": 1.0,
326
+ },
327
+ ],
328
+ }
329
+
330
+ score = reward.compute(action, obs, state, snapshot, ctx)
331
+ # Should have FP penalty (-0.2 per false positive)
332
+ assert score < 0
333
+
334
+ def test_blue_phishing_detection_reward(self):
335
+ """Blue gets phishing reward when SE events exist and Blue detects them."""
336
+ from open_range.server.rewards import CompositeBlueReward
337
+ from open_range.server.models import RangeAction, RangeObservation, RangeState
338
+
339
+ reward = CompositeBlueReward()
340
+ action = RangeAction(command="grep phish /var/log/siem/all.log", mode="blue")
341
+ obs = RangeObservation(stdout="")
342
+ state = RangeState(step_count=1, tier=1)
343
+ snapshot = SnapshotSpec()
344
+
345
+ ctx = {
346
+ "red_history": [],
347
+ "blue_history": [
348
+ {"type": "finding", "content": "Detected phishing email to Alice"},
349
+ ],
350
+ "npc_traffic_log": [
351
+ {
352
+ "type": "social_engineering",
353
+ "label": "reactive",
354
+ "persona": "Alice",
355
+ "action": "click_link",
356
+ "result": "success",
357
+ "source": "web:access_log",
358
+ "timestamp": 1.0,
359
+ },
360
+ ],
361
+ }
362
+
363
+ score = reward.compute(action, obs, state, snapshot, ctx)
364
+ # phishing weight is 0.05, Blue detected 1/1 SE events
365
+ assert score > 0
tests/test_parse_llm_response.py CHANGED
@@ -59,6 +59,8 @@ class TestRealLLMOutput:
59
  @pytest.fixture
60
  def llm_json(self):
61
  path = ROOT / "snapshots" / "llm_tier1_test.json"
 
 
62
  return path.read_text()
63
 
64
  def test_parses_to_snapshot_spec(self, llm_json):
 
59
  @pytest.fixture
60
  def llm_json(self):
61
  path = ROOT / "snapshots" / "llm_tier1_test.json"
62
+ if not path.exists():
63
+ pytest.skip("llm_tier1_test.json fixture not present")
64
  return path.read_text()
65
 
66
  def test_parses_to_snapshot_spec(self, llm_json):
tests/test_renderer_integration.py CHANGED
@@ -23,6 +23,8 @@ SNAPSHOT_PATH = ROOT / "snapshots" / "llm_tier1_test.json"
23
  @pytest.fixture
24
  def llm_output() -> dict:
25
  """Load the real LLM output JSON."""
 
 
26
  return json.loads(SNAPSHOT_PATH.read_text())
27
 
28
 
@@ -208,8 +210,8 @@ class TestDockerCompose:
208
  def test_db_has_mysql_env_vars(self, rendered_dir):
209
  compose = (rendered_dir / "docker-compose.yml").read_text()
210
  assert "MYSQL_ROOT_PASSWORD" in compose
211
- assert "MYSQL_DATABASE=referral_db" in compose
212
- assert "MYSQL_USER=app_user" in compose
213
 
214
 
215
  # ---------------------------------------------------------------------------
 
23
  @pytest.fixture
24
  def llm_output() -> dict:
25
  """Load the real LLM output JSON."""
26
+ if not SNAPSHOT_PATH.exists():
27
+ pytest.skip("llm_tier1_test.json fixture not present")
28
  return json.loads(SNAPSHOT_PATH.read_text())
29
 
30
 
 
210
  def test_db_has_mysql_env_vars(self, rendered_dir):
211
  compose = (rendered_dir / "docker-compose.yml").read_text()
212
  assert "MYSQL_ROOT_PASSWORD" in compose
213
+ assert "MYSQL_DATABASE=" in compose
214
+ assert "MYSQL_USER=" in compose
215
 
216
 
217
  # ---------------------------------------------------------------------------
tests/test_solvers.py CHANGED
@@ -12,7 +12,7 @@ from __future__ import annotations
12
  import pytest
13
 
14
  from open_range.agents.protocol import RangeAgent
15
- from open_range.agents.scripted_agent import ScriptedAgent
16
  from open_range.agents.solvers import (
17
  BLUE_DEFENSE_COMMANDS,
18
  TIER1_RED_COMMANDS,
 
12
  import pytest
13
 
14
  from open_range.agents.protocol import RangeAgent
15
+ from open_range.agents.replay_agent import ScriptedAgent
16
  from open_range.agents.solvers import (
17
  BLUE_DEFENSE_COMMANDS,
18
  TIER1_RED_COMMANDS,
tests/test_synthetic.py CHANGED
@@ -11,9 +11,10 @@ import pytest
11
  from click.testing import CliRunner
12
 
13
  from open_range.agents.llm_agent import LLMRangeAgent
14
- from open_range.agents.scripted_agent import ScriptedAgent, ScriptedBlueAgent, ScriptedRedAgent
15
  from open_range.cli import cli
16
  from open_range.server.models import RangeAction
 
17
  from open_range.training.synthetic import (
18
  SyntheticRangeEnvironment,
19
  SyntheticTraceGenerator,
@@ -101,6 +102,11 @@ class TestSyntheticTraceGenerator:
101
  records = [json.loads(line) for line in output_path.read_text().splitlines()]
102
  assert {record["role"] for record in records} == {"red", "blue"}
103
  assert all(record["messages"][0]["role"] == "system" for record in records)
 
 
 
 
 
104
 
105
  def test_build_teacher_agents_falls_back_to_scripted_when_no_model(self):
106
  red, blue = build_teacher_agents(teacher_model=None, roles=("red", "blue"))
@@ -154,6 +160,42 @@ class TestLiteLLMSupport:
154
  assert captured["drop_params"] is True
155
 
156
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
157
  class TestSyntheticCLI:
158
  def test_cli_generates_jsonl_from_snapshot(self, tmp_path, sample_snapshot_spec):
159
  runner = CliRunner()
@@ -187,6 +229,130 @@ class TestSyntheticCLI:
187
  records = [json.loads(line) for line in output_path.read_text().splitlines()]
188
  assert len(records) == 1
189
  assert records[0]["role"] == "red"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
190
 
191
 
192
  @pytest.mark.live_model
 
11
  from click.testing import CliRunner
12
 
13
  from open_range.agents.llm_agent import LLMRangeAgent
14
+ from open_range.agents.replay_agent import ScriptedAgent, ScriptedBlueAgent, ScriptedRedAgent
15
  from open_range.cli import cli
16
  from open_range.server.models import RangeAction
17
+ from open_range.training.dataset import append_tool_context, load_jsonl_records, load_tool_context
18
  from open_range.training.synthetic import (
19
  SyntheticRangeEnvironment,
20
  SyntheticTraceGenerator,
 
102
  records = [json.loads(line) for line in output_path.read_text().splitlines()]
103
  assert {record["role"] for record in records} == {"red", "blue"}
104
  assert all(record["messages"][0]["role"] == "system" for record in records)
105
+ assert all(record["messages"][1]["role"] == "user" for record in records)
106
+ assert any(message["role"] == "tool" for message in records[0]["messages"])
107
+ assert all("metadata" in record for record in records)
108
+ assert all("ground_truth_flag" in record for record in records)
109
+ assert all("optimal_steps" in record for record in records)
110
 
111
  def test_build_teacher_agents_falls_back_to_scripted_when_no_model(self):
112
  red, blue = build_teacher_agents(teacher_model=None, roles=("red", "blue"))
 
160
  assert captured["drop_params"] is True
161
 
162
 
163
+ class TestDatasetHelpers:
164
+ def test_load_bootstrap_records_and_append_tool_context(self, tmp_path):
165
+ bootstrap_path = tmp_path / "bootstrap.jsonl"
166
+ bootstrap_path.write_text(
167
+ json.dumps(
168
+ {
169
+ "messages": [
170
+ {"role": "system", "content": "Seed system prompt"},
171
+ {"role": "user", "content": "obs"},
172
+ {"role": "assistant", "content": "cmd"},
173
+ ],
174
+ "metadata": {"source": "bootstrap"},
175
+ }
176
+ )
177
+ + "\n"
178
+ )
179
+ tool_path = tmp_path / "tools.json"
180
+ tool_path.write_text(
181
+ json.dumps(
182
+ [
183
+ {"name": "shell_command", "description": "Run a shell command"},
184
+ {"name": "read_file", "description": "Read file contents"},
185
+ ]
186
+ )
187
+ )
188
+
189
+ records = load_jsonl_records([bootstrap_path])
190
+ tool_context = load_tool_context([tool_path])
191
+ enriched = append_tool_context(records, tool_context)
192
+
193
+ assert len(records) == 1
194
+ assert "shell_command" in tool_context
195
+ assert "Available tools" in enriched[0]["messages"][0]["content"]
196
+ assert "read_file" in enriched[0]["messages"][0]["content"]
197
+
198
+
199
  class TestSyntheticCLI:
200
  def test_cli_generates_jsonl_from_snapshot(self, tmp_path, sample_snapshot_spec):
201
  runner = CliRunner()
 
229
  records = [json.loads(line) for line in output_path.read_text().splitlines()]
230
  assert len(records) == 1
231
  assert records[0]["role"] == "red"
232
+ assert any(message["role"] == "tool" for message in records[0]["messages"])
233
+ assert any(message.get("tool_calls") for message in records[0]["messages"] if message["role"] == "assistant")
234
+
235
+ def test_cli_merges_bootstrap_traces_and_tool_info(self, tmp_path, sample_snapshot_spec):
236
+ runner = CliRunner()
237
+ snapshot_path = tmp_path / "spec.json"
238
+ snapshot_path.write_text(json.dumps(sample_snapshot_spec.model_dump(mode="python")))
239
+ bootstrap_path = tmp_path / "bootstrap.jsonl"
240
+ bootstrap_path.write_text(
241
+ json.dumps(
242
+ {
243
+ "messages": [
244
+ {"role": "system", "content": "Bootstrap system"},
245
+ {"role": "user", "content": "bootstrap obs"},
246
+ {"role": "assistant", "content": "bootstrap cmd"},
247
+ ],
248
+ "metadata": {"source": "bootstrap"},
249
+ }
250
+ )
251
+ + "\n"
252
+ )
253
+ tool_path = tmp_path / "tools.md"
254
+ tool_path.write_text("- shell_command: Run shell commands\n- read_file: Read files\n")
255
+ output_path = tmp_path / "merged.jsonl"
256
+
257
+ result = runner.invoke(
258
+ cli,
259
+ [
260
+ "synthetic-data",
261
+ "--snapshot",
262
+ str(snapshot_path),
263
+ "--output",
264
+ str(output_path),
265
+ "--num-traces",
266
+ "1",
267
+ "--max-steps",
268
+ "1",
269
+ "--roles",
270
+ "red",
271
+ "--reward-threshold",
272
+ "-1",
273
+ "--bootstrap-traces",
274
+ str(bootstrap_path),
275
+ "--tool-info",
276
+ str(tool_path),
277
+ "--static-flags",
278
+ ],
279
+ )
280
+
281
+ assert result.exit_code == 0, result.output
282
+ records = [json.loads(line) for line in output_path.read_text().splitlines()]
283
+ assert len(records) == 2
284
+ assert records[0]["metadata"]["source"] == "bootstrap"
285
+ assert "Available tools" in records[1]["messages"][0]["content"]
286
+ assert "shell_command" in records[1]["messages"][0]["content"]
287
+
288
+ def test_cli_can_emit_generated_only_while_using_bootstrap_examples(self, tmp_path, sample_snapshot_spec):
289
+ runner = CliRunner()
290
+ snapshot_path = tmp_path / "spec.json"
291
+ snapshot_path.write_text(json.dumps(sample_snapshot_spec.model_dump(mode="python")))
292
+ bootstrap_path = tmp_path / "bootstrap.jsonl"
293
+ bootstrap_path.write_text(
294
+ json.dumps(
295
+ {
296
+ "messages": [
297
+ {"role": "system", "content": "Bootstrap system"},
298
+ {"role": "user", "content": "bootstrap prompt"},
299
+ {
300
+ "role": "assistant",
301
+ "content": "<think>Seed</think>",
302
+ "tool_calls": [
303
+ {
304
+ "id": "call_seed",
305
+ "type": "function",
306
+ "function": {
307
+ "name": "shell_command",
308
+ "arguments": "{\"command\": \"whoami\"}",
309
+ },
310
+ }
311
+ ],
312
+ },
313
+ {
314
+ "role": "tool",
315
+ "name": "shell_command",
316
+ "tool_call_id": "call_seed",
317
+ "content": "[0.2s] kali",
318
+ },
319
+ ],
320
+ "metadata": {"source": "bootstrap", "success": True, "total_turns": 4},
321
+ }
322
+ )
323
+ + "\n"
324
+ )
325
+ output_path = tmp_path / "generated_only.jsonl"
326
+
327
+ result = runner.invoke(
328
+ cli,
329
+ [
330
+ "synthetic-data",
331
+ "--snapshot",
332
+ str(snapshot_path),
333
+ "--output",
334
+ str(output_path),
335
+ "--num-traces",
336
+ "1",
337
+ "--max-steps",
338
+ "1",
339
+ "--roles",
340
+ "red",
341
+ "--reward-threshold",
342
+ "-1",
343
+ "--bootstrap-traces",
344
+ str(bootstrap_path),
345
+ "--bootstrap-examples",
346
+ "1",
347
+ "--generated-only",
348
+ "--static-flags",
349
+ ],
350
+ )
351
+
352
+ assert result.exit_code == 0, result.output
353
+ records = [json.loads(line) for line in output_path.read_text().splitlines()]
354
+ assert len(records) == 1
355
+ assert records[0]["metadata"]["source"] == "open_range.synthetic"
356
 
357
 
358
  @pytest.mark.live_model
tests/test_trajectory.py CHANGED
@@ -41,12 +41,48 @@ class TestTurn:
41
  class TestEpisode:
42
  def _make_episode(self) -> Episode:
43
  ep = Episode(episode_id="ep-1", snapshot_id="snap-1", tier=1)
 
 
 
 
44
  ep.turns = [
45
- Turn(role="red", observation="briefing", action="nmap -sV web", reward=0.1),
46
- Turn(role="blue", observation="alert: nmap", action="submit_finding nmap scan", reward=0.2),
47
- Turn(role="red", observation="ports found", action="curl http://web/search?q=test", reward=0.15),
48
- Turn(role="blue", observation="sql log", action="grep UNION /var/log/siem/web.log", reward=0.05),
49
- Turn(role="red", observation="sqli output", action="submit_flag FLAG{sqli_123}", reward=0.5),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  ]
51
  ep.outcome = "flag_captured"
52
  return ep
@@ -74,22 +110,26 @@ class TestEpisode:
74
  def test_to_chat_messages_red(self):
75
  ep = self._make_episode()
76
  msgs = ep.to_chat_messages("red")
77
- # system + 3 * (user + assistant) = 7 messages
78
- assert len(msgs) == 7
79
  assert msgs[0]["role"] == "system"
80
  assert msgs[0]["content"] == RED_SYSTEM_PROMPT
81
  assert msgs[1]["role"] == "user"
82
- assert msgs[1]["content"] == "briefing"
83
  assert msgs[2]["role"] == "assistant"
84
- assert msgs[2]["content"] == "nmap -sV web"
 
 
85
 
86
  def test_to_chat_messages_blue(self):
87
  ep = self._make_episode()
88
  msgs = ep.to_chat_messages("blue")
89
- # system + 2 * (user + assistant) = 5 messages
90
- assert len(msgs) == 5
91
  assert msgs[0]["role"] == "system"
92
  assert msgs[0]["content"] == BLUE_SYSTEM_PROMPT
 
 
93
 
94
  def test_to_jsonl_record(self):
95
  ep = self._make_episode()
@@ -113,10 +153,16 @@ class TestEpisode:
113
  class TestTrajectoryLogger:
114
  def test_start_episode(self):
115
  logger = TrajectoryLogger()
116
- ep = logger.start_episode("ep-1", snapshot_id="snap-1", tier=2)
 
 
 
 
 
117
  assert ep.episode_id == "ep-1"
118
  assert ep.snapshot_id == "snap-1"
119
  assert ep.tier == 2
 
120
  assert logger.current_episode is ep
121
 
122
  def test_log_turn(self):
@@ -238,9 +284,13 @@ class TestExportJsonl:
238
  # Messages must follow chat format
239
  msgs = record["messages"]
240
  assert msgs[0]["role"] == "system"
 
241
  for msg in msgs:
242
  assert "role" in msg
243
  assert "content" in msg
 
 
 
244
 
245
  def test_export_creates_parent_dirs(self, tmp_path: Path):
246
  logger = self._build_logger_with_episodes()
 
41
  class TestEpisode:
42
  def _make_episode(self) -> Episode:
43
  ep = Episode(episode_id="ep-1", snapshot_id="snap-1", tier=1)
44
+ ep.briefings = {
45
+ "red": "Red briefing",
46
+ "blue": "Blue briefing",
47
+ }
48
  ep.turns = [
49
+ Turn(
50
+ role="red",
51
+ observation="[0.2s] 80/tcp open http",
52
+ action="nmap -sV web",
53
+ reward=0.1,
54
+ assistant_content="<think>\nRecon first.\n</think>",
55
+ ),
56
+ Turn(
57
+ role="blue",
58
+ observation="[0.3s] suspicious nmap",
59
+ action="submit_finding nmap scan",
60
+ reward=0.2,
61
+ assistant_content="<think>\nThis is actionable.\n</think>",
62
+ ),
63
+ Turn(
64
+ role="red",
65
+ observation="[0.4s] products",
66
+ action="curl http://web/search?q=test",
67
+ reward=0.15,
68
+ assistant_content="<think>\nInspect the search route.\n</think>",
69
+ ),
70
+ Turn(
71
+ role="blue",
72
+ observation="[0.5s] SQLi in web log",
73
+ action="grep UNION /var/log/siem/web.log",
74
+ reward=0.05,
75
+ assistant_content="<think>\nI need evidence.\n</think>",
76
+ ),
77
+ Turn(
78
+ role="red",
79
+ observation="[0.6s] correct",
80
+ action="submit_flag FLAG{sqli_123}",
81
+ reward=0.5,
82
+ assistant_content="<think>\nThis token is worth validating.\n</think>",
83
+ tool_name="flag_found",
84
+ tool_arguments={"flag": "FLAG{sqli_123}"},
85
+ ),
86
  ]
87
  ep.outcome = "flag_captured"
88
  return ep
 
110
  def test_to_chat_messages_red(self):
111
  ep = self._make_episode()
112
  msgs = ep.to_chat_messages("red")
113
+ # system + briefing + 3 * (assistant + tool) = 8 messages
114
+ assert len(msgs) == 8
115
  assert msgs[0]["role"] == "system"
116
  assert msgs[0]["content"] == RED_SYSTEM_PROMPT
117
  assert msgs[1]["role"] == "user"
118
+ assert msgs[1]["content"] == "Red briefing"
119
  assert msgs[2]["role"] == "assistant"
120
+ assert "tool_calls" in msgs[2]
121
+ assert msgs[2]["tool_calls"][0]["function"]["name"] == "shell_command"
122
+ assert msgs[3]["role"] == "tool"
123
 
124
  def test_to_chat_messages_blue(self):
125
  ep = self._make_episode()
126
  msgs = ep.to_chat_messages("blue")
127
+ # system + briefing + 2 * (assistant + tool) = 6 messages
128
+ assert len(msgs) == 6
129
  assert msgs[0]["role"] == "system"
130
  assert msgs[0]["content"] == BLUE_SYSTEM_PROMPT
131
+ assert msgs[1]["role"] == "user"
132
+ assert msgs[1]["content"] == "Blue briefing"
133
 
134
  def test_to_jsonl_record(self):
135
  ep = self._make_episode()
 
153
  class TestTrajectoryLogger:
154
  def test_start_episode(self):
155
  logger = TrajectoryLogger()
156
+ ep = logger.start_episode(
157
+ "ep-1",
158
+ snapshot_id="snap-1",
159
+ tier=2,
160
+ briefings={"red": "brief"},
161
+ )
162
  assert ep.episode_id == "ep-1"
163
  assert ep.snapshot_id == "snap-1"
164
  assert ep.tier == 2
165
+ assert ep.briefings["red"] == "brief"
166
  assert logger.current_episode is ep
167
 
168
  def test_log_turn(self):
 
284
  # Messages must follow chat format
285
  msgs = record["messages"]
286
  assert msgs[0]["role"] == "system"
287
+ assert msgs[1]["role"] == "user"
288
  for msg in msgs:
289
  assert "role" in msg
290
  assert "content" in msg
291
+ for msg in msgs:
292
+ if msg["role"] == "assistant":
293
+ assert msg["tool_calls"]
294
 
295
  def test_export_creates_parent_dirs(self, tmp_path: Path):
296
  logger = self._build_logger_with_episodes()