Lars Talian commited on
Commit
6e4e622
·
unverified ·
1 Parent(s): 202e768

Fix lint packaging and fail closed runtime paths (#93)

Browse files
src/manifests/schema.py CHANGED
@@ -1,397 +1,45 @@
1
- """Pydantic models for OpenRange manifest validation.
2
-
3
- A manifest declares the *world* of a cyber range: the fictional company, its
4
- people, data, business processes, network topology, and the vulnerability
5
- families the Builder may plant. This rich context lets the Builder LLM
6
- generate scenarios where vulns, NPCs, and data flows make narrative sense --
7
- not just "a web server with SQLi" but "a patient-referral portal where the
8
- search endpoint trusts user input because the original dev left six months ago
9
- and nobody reviewed it since."
10
- """
11
-
12
- from __future__ import annotations
13
-
14
- from pathlib import Path
15
- from typing import Any, Literal
16
-
17
- import yaml
18
- from pydantic import BaseModel, Field, model_validator
19
-
20
-
21
- # ---------------------------------------------------------------------------
22
- # Company context -- gives the Builder a story to build from
23
- # ---------------------------------------------------------------------------
24
-
25
- class Company(BaseModel):
26
- """The fictional company that owns this range."""
27
-
28
- name: str = Field(..., description="Company name, e.g. 'Meridian Health Partners'")
29
- domain: str = Field(..., description="Internal FQDN domain, e.g. 'meridianhealth.local'")
30
- industry: str = Field(..., description="Industry vertical, e.g. 'healthcare'")
31
- description: str = Field(
32
- ..., description="What the company does -- 2-3 sentences the Builder uses for narrative"
33
- )
34
-
35
-
36
- class Department(BaseModel):
37
- """An organizational unit with its own access profile."""
38
-
39
- name: str
40
- description: str = ""
41
- hosts_accessed: list[str] = Field(
42
- default_factory=list,
43
- description="Hostnames staff in this dept routinely access",
44
- )
45
-
46
-
47
- # ---------------------------------------------------------------------------
48
- # People: users and NPC personas
49
- # ---------------------------------------------------------------------------
50
-
51
- class User(BaseModel):
52
- """A user account that exists in the range (LDAP/local)."""
53
-
54
- username: str
55
- full_name: str = ""
56
- department: str = ""
57
- role: str = ""
58
- email: str = ""
59
- hosts: list[str] = Field(
60
- default_factory=list,
61
- description="Hosts where this user has an account",
62
- )
63
-
64
-
65
- class NPCProfile(BaseModel, extra="allow"):
66
- """An NPC persona the Builder should generate traffic and behavior for."""
67
-
68
- username: str = Field(..., description="Must reference a User.username")
69
- security_awareness: float = Field(
70
- default=0.5, ge=0.0, le=1.0,
71
- description="0=clueless, 1=CISO-level. Determines susceptibility to social engineering",
72
- )
73
- daily_activities: list[str] = Field(
74
- default_factory=list,
75
- description="What this person does all day -- generates realistic traffic patterns",
76
- )
77
- susceptibility: dict[str, float] = Field(
78
- default_factory=dict,
79
- description="Attack-type -> probability of falling for it, e.g. {'phishing_email': 0.7}",
80
- )
81
-
82
-
83
- # ---------------------------------------------------------------------------
84
- # Data and business processes -- tells the Builder what to protect
85
- # ---------------------------------------------------------------------------
86
-
87
- class DataAsset(BaseModel):
88
- """A piece of sensitive data that exists somewhere in the range."""
89
-
90
- name: str = Field(..., description="Human name, e.g. 'Patient referral records'")
91
- classification: Literal["public", "internal", "confidential", "restricted"] = "internal"
92
- host: str = Field(..., description="Host where this data lives")
93
- location: str = Field(
94
- default="", description="Path or service, e.g. '/srv/shares/hr' or 'mysql:app_db.patients'"
95
- )
96
- description: str = ""
97
-
98
-
99
- class BusinessProcess(BaseModel):
100
- """A cross-service data flow the Builder should keep realistic."""
101
-
102
- name: str
103
- description: str = ""
104
- data_flow: list[str] = Field(
105
- default_factory=list,
106
- description="Ordered list of host:service hops, e.g. ['web:nginx', 'db:mysql', 'siem:rsyslog']",
107
- )
108
-
109
-
110
- # ---------------------------------------------------------------------------
111
- # Infrastructure realism -- software, config, and operational context
112
- # ---------------------------------------------------------------------------
113
-
114
- class TechStack(BaseModel, extra="allow"):
115
- """Specific software versions and known technical debt.
116
-
117
- Accepts both flat string fields and nested dicts for flexibility.
118
- """
119
-
120
- known_debt: list[str] = Field(default_factory=list)
121
-
122
-
123
- class CredentialPolicy(BaseModel, extra="allow"):
124
- """How credentials work (and fail) in this organization.
125
-
126
- Accepts flexible formats: flat strings or structured dicts.
127
- """
128
-
129
- enforcement_gaps: list[Any] = Field(default_factory=list)
130
-
131
-
132
- class MonitoringCoverage(BaseModel, extra="allow"):
133
- """What Blue can actually see — and the blind spots Red can exploit."""
134
-
135
- logged: list[Any] = Field(default_factory=list)
136
- blind_spots: list[str] = Field(default_factory=list)
137
- alert_rules: list[Any] = Field(default_factory=list)
138
- retention_days: int = Field(default=90)
139
-
140
-
141
- class TrustRelationship(BaseModel, extra="allow"):
142
- """Who trusts whom — the social graph Red can exploit for lateral movement.
143
-
144
- Accepts 'from'/'to' (YAML-friendly) or 'source'/'target' field names.
145
- """
146
-
147
- type: str = Field(
148
- ..., description="Relationship type: reports_to, delegates_access, shares_credentials, trusts_email"
149
- )
150
-
151
- # Accept either naming convention
152
- source: str = ""
153
- target: str = ""
154
-
155
- # 'from' and 'to' are Python keywords, handle via model_validator
156
- @model_validator(mode="before")
157
- @classmethod
158
- def _normalize_field_names(cls, data: Any) -> Any:
159
- if isinstance(data, dict):
160
- # Normalize various naming conventions to source/target
161
- for src_key in ("from", "from_user"):
162
- if src_key in data and not data.get("source"):
163
- data["source"] = data.pop(src_key)
164
- for tgt_key in ("to", "to_user"):
165
- if tgt_key in data and not data.get("target"):
166
- data["target"] = data.pop(tgt_key)
167
- # Accept 'description', 'detail', or 'context' as the explanation field
168
- for alt in ("description", "detail"):
169
- if alt in data and "context" not in data:
170
- data["context"] = data.pop(alt)
171
- return data
172
-
173
- context: str = Field(
174
- default="", description="Why this trust exists"
175
- )
176
-
177
-
178
- class OperationalContext(BaseModel, extra="allow"):
179
- """How this company actually operates day-to-day.
180
-
181
- Accepts flexible formats for all fields.
182
- """
183
-
184
- recent_incidents: list[Any] = Field(default_factory=list)
185
- audit_findings: list[Any] = Field(default_factory=list)
186
- maintenance_windows: list[Any] | Any = Field(default_factory=list)
187
- vendor_access: list[Any] = Field(default_factory=list)
188
- recent_changes: list[Any] = Field(default_factory=list)
189
-
190
- @model_validator(mode="before")
191
- @classmethod
192
- def _normalize_fields(cls, data: Any) -> Any:
193
- if isinstance(data, dict):
194
- # Accept 'compliance' as alias for 'compliance_frameworks'
195
- if "compliance" in data and "compliance_frameworks" not in data:
196
- data["compliance_frameworks"] = data.pop("compliance")
197
- # Normalize maintenance_windows dict to list
198
- mw = data.get("maintenance_windows")
199
- if isinstance(mw, dict):
200
- data["maintenance_windows"] = [f"{k}: {v}" for k, v in mw.items()]
201
- elif isinstance(mw, str):
202
- data["maintenance_windows"] = [mw]
203
- # Normalize list-of-dicts to list-of-strings where needed
204
- for field in ("recent_incidents", "vendor_access", "recent_changes"):
205
- items = data.get(field, [])
206
- if items and isinstance(items[0], dict):
207
- data[field] = [
208
- item.get("description", "") or " | ".join(f"{k}: {v}" for k, v in item.items())
209
- for item in items
210
- ]
211
- return data
212
-
213
- compliance_frameworks: list[str] = Field(default_factory=list)
214
-
215
-
216
- # ---------------------------------------------------------------------------
217
- # Topology primitives
218
- # ---------------------------------------------------------------------------
219
-
220
- class ExposurePolicy(BaseModel):
221
- """Per-host exposure configuration."""
222
-
223
- level: Literal["public", "hidden", "authenticated", "misconfigured"] = "public"
224
- auth_required: bool = False
225
- notes: str = ""
226
-
227
-
228
- class Host(BaseModel):
229
- """A single host (container) in the range topology."""
230
-
231
- name: str = Field(..., description="Unique hostname, e.g. 'web', 'db'")
232
- zone: str = Field(..., description="Network zone this host belongs to")
233
- purpose: str = Field(
234
- default="",
235
- description="Why this host exists in the company, e.g. 'Customer-facing referral portal'",
236
- )
237
- hostname: str = Field(
238
- default="",
239
- description="FQDN in the company domain, e.g. 'portal.meridianhealth.local'",
240
- )
241
- services: list[str] = Field(
242
- default_factory=list,
243
- description="Services running on this host, e.g. ['nginx', 'php', 'sshd']",
244
- )
245
- connects_to: list[str] = Field(
246
- default_factory=list,
247
- description="Hostnames this host initiates connections to",
248
- )
249
- os: str = Field(
250
- default="ubuntu:22.04",
251
- description="Base OS image for the container",
252
- )
253
- exposure: ExposurePolicy = Field(default_factory=ExposurePolicy)
254
-
255
-
256
- class Network(BaseModel):
257
- """A named network zone with an optional CIDR."""
258
-
259
- name: str = Field(..., description="Zone name, e.g. 'dmz', 'internal'")
260
- cidr: str | None = Field(
261
- default=None,
262
- description="Subnet CIDR, e.g. '10.0.1.0/24'",
263
- )
264
-
265
-
266
- class FirewallRule(BaseModel):
267
- """A directional firewall rule between two zones."""
268
-
269
- action: Literal["allow", "deny"] = Field(
270
- ..., description="Whether traffic is allowed or denied"
271
- )
272
- from_zone: str = Field(..., description="Source zone")
273
- to_zone: str = Field(..., description="Destination zone")
274
- ports: list[int] = Field(
275
- default_factory=list,
276
- description="TCP ports this rule applies to (empty = all ports)",
277
- )
278
-
279
-
280
- class Topology(BaseModel):
281
- """Full network topology: hosts, networks, and firewall rules."""
282
-
283
- hosts: list[Host] = Field(..., min_length=1)
284
- networks: list[Network] = Field(..., min_length=1)
285
- firewall_rules: list[FirewallRule] = Field(default_factory=list)
286
-
287
- @model_validator(mode="after")
288
- def _hosts_reference_valid_zones(self) -> "Topology":
289
- zone_names = {n.name for n in self.networks}
290
- for host in self.hosts:
291
- if host.zone not in zone_names:
292
- raise ValueError(
293
- f"Host '{host.name}' references zone '{host.zone}' "
294
- f"which is not defined in networks: {sorted(zone_names)}"
295
- )
296
- return self
297
-
298
- @model_validator(mode="after")
299
- def _firewall_rules_reference_valid_zones(self) -> "Topology":
300
- zone_names = {n.name for n in self.networks}
301
- for rule in self.firewall_rules:
302
- for attr in ("from_zone", "to_zone"):
303
- zone = getattr(rule, attr)
304
- if zone not in zone_names:
305
- raise ValueError(
306
- f"Firewall rule references zone '{zone}' "
307
- f"which is not defined in networks: {sorted(zone_names)}"
308
- )
309
- return self
310
-
311
-
312
- # ---------------------------------------------------------------------------
313
- # Difficulty envelope
314
- # ---------------------------------------------------------------------------
315
-
316
- class Difficulty(BaseModel):
317
- """Difficulty constraints the Validator enforces on generated ranges."""
318
-
319
- max_steps: int = Field(
320
- ..., gt=0, description="Maximum golden-path steps allowed"
321
- )
322
- min_vulns: int = Field(
323
- default=1, ge=1, description="Minimum planted vulnerabilities"
324
- )
325
- max_vulns: int = Field(
326
- default=3, ge=1, description="Maximum planted vulnerabilities"
327
- )
328
-
329
- @model_validator(mode="after")
330
- def _min_le_max(self) -> "Difficulty":
331
- if self.min_vulns > self.max_vulns:
332
- raise ValueError(
333
- f"min_vulns ({self.min_vulns}) must be <= max_vulns ({self.max_vulns})"
334
- )
335
- return self
336
-
337
-
338
- # ---------------------------------------------------------------------------
339
- # Top-level manifest
340
- # ---------------------------------------------------------------------------
341
-
342
- class Manifest(BaseModel):
343
- """Top-level range manifest -- the contract between humans and the Builder.
344
-
345
- Required fields define the network and vuln envelope. Optional fields
346
- (company, users, NPCs, data, processes) provide narrative context that
347
- lets the Builder generate realistic, interconnected scenarios.
348
- """
349
-
350
- name: str = Field(..., description="Human-readable range name")
351
- tier: int = Field(..., ge=1, le=5, description="Complexity tier (1-5)")
352
-
353
- # Company context (optional but strongly encouraged)
354
- company: Company | None = None
355
- departments: list[Department] = Field(default_factory=list)
356
- users: list[User] = Field(default_factory=list)
357
- npc_personas: list[NPCProfile] = Field(default_factory=list)
358
- data_inventory: list[DataAsset] = Field(default_factory=list)
359
- business_processes: list[BusinessProcess] = Field(default_factory=list)
360
-
361
- # Infrastructure realism (optional — enriches Builder context)
362
- tech_stack: TechStack | Any | None = None
363
- credential_policy: CredentialPolicy | Any | None = None
364
- monitoring_coverage: MonitoringCoverage | Any | None = None
365
- trust_relationships: list[TrustRelationship] = Field(default_factory=list)
366
- operational_context: OperationalContext | Any | None = None
367
-
368
- # Core topology and vuln envelope
369
- topology: Topology
370
- bug_families: list[str] = Field(
371
- ...,
372
- min_length=1,
373
- description="Vulnerability classes the Builder may plant (LLM generates details from these type names)",
374
- )
375
- task_families: list[str] = Field(
376
- default=["exploit", "investigate", "patch", "report"],
377
- description="Task types agents may be asked to perform",
378
- )
379
- difficulty: Difficulty
380
-
381
-
382
- # ---------------------------------------------------------------------------
383
- # Loader
384
- # ---------------------------------------------------------------------------
385
-
386
- def load_manifest(path: str | Path) -> Manifest:
387
- """Load a YAML manifest file and return a validated ``Manifest``.
388
-
389
- Raises ``FileNotFoundError`` if the file does not exist.
390
- Raises ``pydantic.ValidationError`` if the content is invalid.
391
- """
392
- path = Path(path)
393
- if not path.exists():
394
- raise FileNotFoundError(f"Manifest not found: {path}")
395
- with open(path) as fh:
396
- raw = yaml.safe_load(fh)
397
- return Manifest(**raw)
 
1
+ """Backward-compatible shim for the packaged manifest schema."""
2
+
3
+ from open_range.manifest_schema import (
4
+ BusinessProcess,
5
+ Company,
6
+ CredentialPolicy,
7
+ DataAsset,
8
+ Department,
9
+ Difficulty,
10
+ ExposurePolicy,
11
+ FirewallRule,
12
+ Host,
13
+ Manifest,
14
+ MonitoringCoverage,
15
+ NPCProfile,
16
+ Network,
17
+ OperationalContext,
18
+ TechStack,
19
+ Topology,
20
+ TrustRelationship,
21
+ User,
22
+ load_manifest,
23
+ )
24
+
25
+ __all__ = [
26
+ "BusinessProcess",
27
+ "Company",
28
+ "CredentialPolicy",
29
+ "DataAsset",
30
+ "Department",
31
+ "Difficulty",
32
+ "ExposurePolicy",
33
+ "FirewallRule",
34
+ "Host",
35
+ "Manifest",
36
+ "MonitoringCoverage",
37
+ "NPCProfile",
38
+ "Network",
39
+ "OperationalContext",
40
+ "TechStack",
41
+ "Topology",
42
+ "TrustRelationship",
43
+ "User",
44
+ "load_manifest",
45
+ ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/open_range/__init__.py CHANGED
@@ -1,8 +1,13 @@
1
  """OpenRange public package surface."""
2
 
3
- from open_range.client.client import OpenRangeEnv
4
- from open_range.models import RangeAction, RangeObservation, RangeState
5
- from open_range.server.environment import RangeEnvironment
 
 
 
 
 
6
 
7
  __all__ = [
8
  "OpenRangeEnv",
@@ -11,3 +16,28 @@ __all__ = [
11
  "RangeObservation",
12
  "RangeState",
13
  ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  """OpenRange public package surface."""
2
 
3
+ from __future__ import annotations
4
+
5
+ from typing import TYPE_CHECKING, Any
6
+
7
+ if TYPE_CHECKING:
8
+ from open_range.client.client import OpenRangeEnv
9
+ from open_range.models import RangeAction, RangeObservation, RangeState
10
+ from open_range.server.environment import RangeEnvironment
11
 
12
  __all__ = [
13
  "OpenRangeEnv",
 
16
  "RangeObservation",
17
  "RangeState",
18
  ]
19
+
20
+
21
+ def __getattr__(name: str) -> Any:
22
+ """Resolve public exports lazily so light CLIs avoid heavy imports."""
23
+ if name == "OpenRangeEnv":
24
+ from open_range.client.client import OpenRangeEnv
25
+
26
+ return OpenRangeEnv
27
+ if name == "RangeAction":
28
+ from open_range.models import RangeAction
29
+
30
+ return RangeAction
31
+ if name == "RangeObservation":
32
+ from open_range.models import RangeObservation
33
+
34
+ return RangeObservation
35
+ if name == "RangeState":
36
+ from open_range.models import RangeState
37
+
38
+ return RangeState
39
+ if name == "RangeEnvironment":
40
+ from open_range.server.environment import RangeEnvironment
41
+
42
+ return RangeEnvironment
43
+ raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
src/open_range/lint.py CHANGED
@@ -17,7 +17,7 @@ import sys
17
  from pathlib import Path
18
  from typing import Any
19
 
20
- from manifests.schema import Manifest, load_manifest
21
 
22
 
23
  # ---------------------------------------------------------------------------
 
17
  from pathlib import Path
18
  from typing import Any
19
 
20
+ from open_range.manifest_schema import Manifest, load_manifest
21
 
22
 
23
  # ---------------------------------------------------------------------------
src/open_range/manifest_schema.py ADDED
@@ -0,0 +1,405 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Pydantic models for OpenRange manifest validation.
2
+
3
+ A manifest declares the *world* of a cyber range: the fictional company, its
4
+ people, data, business processes, network topology, and the vulnerability
5
+ families the Builder may plant. This module lives under ``open_range`` so
6
+ installed tooling such as ``python -m open_range.lint`` does not depend on the
7
+ caller also importing the top-level ``manifests`` package.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ from pathlib import Path
13
+ from typing import Any, Literal
14
+
15
+ import yaml
16
+ from pydantic import BaseModel, Field, model_validator
17
+
18
+
19
+ # ---------------------------------------------------------------------------
20
+ # Company context -- gives the Builder a story to build from
21
+ # ---------------------------------------------------------------------------
22
+
23
+
24
+ class Company(BaseModel):
25
+ """The fictional company that owns this range."""
26
+
27
+ name: str = Field(..., description="Company name, e.g. 'Meridian Health Partners'")
28
+ domain: str = Field(..., description="Internal FQDN domain, e.g. 'meridianhealth.local'")
29
+ industry: str = Field(..., description="Industry vertical, e.g. 'healthcare'")
30
+ description: str = Field(
31
+ ..., description="What the company does -- 2-3 sentences the Builder uses for narrative"
32
+ )
33
+
34
+
35
+ class Department(BaseModel):
36
+ """An organizational unit with its own access profile."""
37
+
38
+ name: str
39
+ description: str = ""
40
+ hosts_accessed: list[str] = Field(
41
+ default_factory=list,
42
+ description="Hostnames staff in this dept routinely access",
43
+ )
44
+
45
+
46
+ # ---------------------------------------------------------------------------
47
+ # People: users and NPC personas
48
+ # ---------------------------------------------------------------------------
49
+
50
+
51
+ class User(BaseModel):
52
+ """A user account that exists in the range (LDAP/local)."""
53
+
54
+ username: str
55
+ full_name: str = ""
56
+ department: str = ""
57
+ role: str = ""
58
+ email: str = ""
59
+ hosts: list[str] = Field(
60
+ default_factory=list,
61
+ description="Hosts where this user has an account",
62
+ )
63
+
64
+
65
+ class NPCProfile(BaseModel, extra="allow"):
66
+ """An NPC persona the Builder should generate traffic and behavior for."""
67
+
68
+ username: str = Field(..., description="Must reference a User.username")
69
+ security_awareness: float = Field(
70
+ default=0.5,
71
+ ge=0.0,
72
+ le=1.0,
73
+ description="0=clueless, 1=CISO-level. Determines susceptibility to social engineering",
74
+ )
75
+ daily_activities: list[str] = Field(
76
+ default_factory=list,
77
+ description="What this person does all day -- generates realistic traffic patterns",
78
+ )
79
+ susceptibility: dict[str, float] = Field(
80
+ default_factory=dict,
81
+ description="Attack-type -> probability of falling for it, e.g. {'phishing_email': 0.7}",
82
+ )
83
+
84
+
85
+ # ---------------------------------------------------------------------------
86
+ # Data and business processes -- tells the Builder what to protect
87
+ # ---------------------------------------------------------------------------
88
+
89
+
90
+ class DataAsset(BaseModel):
91
+ """A piece of sensitive data that exists somewhere in the range."""
92
+
93
+ name: str = Field(..., description="Human name, e.g. 'Patient referral records'")
94
+ classification: Literal["public", "internal", "confidential", "restricted"] = "internal"
95
+ host: str = Field(..., description="Host where this data lives")
96
+ location: str = Field(
97
+ default="", description="Path or service, e.g. '/srv/shares/hr' or 'mysql:app_db.patients'"
98
+ )
99
+ description: str = ""
100
+
101
+
102
+ class BusinessProcess(BaseModel):
103
+ """A cross-service data flow the Builder should keep realistic."""
104
+
105
+ name: str
106
+ description: str = ""
107
+ data_flow: list[str] = Field(
108
+ default_factory=list,
109
+ description="Ordered list of host:service hops, e.g. ['web:nginx', 'db:mysql', 'siem:rsyslog']",
110
+ )
111
+
112
+
113
+ # ---------------------------------------------------------------------------
114
+ # Infrastructure realism -- software, config, and operational context
115
+ # ---------------------------------------------------------------------------
116
+
117
+
118
+ class TechStack(BaseModel, extra="allow"):
119
+ """Specific software versions and known technical debt.
120
+
121
+ Accepts both flat string fields and nested dicts for flexibility.
122
+ """
123
+
124
+ known_debt: list[str] = Field(default_factory=list)
125
+
126
+
127
+ class CredentialPolicy(BaseModel, extra="allow"):
128
+ """How credentials work (and fail) in this organization.
129
+
130
+ Accepts flexible formats: flat strings or structured dicts.
131
+ """
132
+
133
+ enforcement_gaps: list[Any] = Field(default_factory=list)
134
+
135
+
136
+ class MonitoringCoverage(BaseModel, extra="allow"):
137
+ """What Blue can actually see -- and the blind spots Red can exploit."""
138
+
139
+ logged: list[Any] = Field(default_factory=list)
140
+ blind_spots: list[str] = Field(default_factory=list)
141
+ alert_rules: list[Any] = Field(default_factory=list)
142
+ retention_days: int = Field(default=90)
143
+
144
+
145
+ class TrustRelationship(BaseModel, extra="allow"):
146
+ """Who trusts whom -- the social graph Red can exploit for lateral movement.
147
+
148
+ Accepts 'from'/'to' (YAML-friendly) or 'source'/'target' field names.
149
+ """
150
+
151
+ type: str = Field(
152
+ ..., description="Relationship type: reports_to, delegates_access, shares_credentials, trusts_email"
153
+ )
154
+
155
+ # Accept either naming convention
156
+ source: str = ""
157
+ target: str = ""
158
+
159
+ # 'from' and 'to' are Python keywords, handle via model_validator
160
+ @model_validator(mode="before")
161
+ @classmethod
162
+ def _normalize_field_names(cls, data: Any) -> Any:
163
+ if isinstance(data, dict):
164
+ # Normalize various naming conventions to source/target
165
+ for src_key in ("from", "from_user"):
166
+ if src_key in data and not data.get("source"):
167
+ data["source"] = data.pop(src_key)
168
+ for tgt_key in ("to", "to_user"):
169
+ if tgt_key in data and not data.get("target"):
170
+ data["target"] = data.pop(tgt_key)
171
+ # Accept 'description', 'detail', or 'context' as the explanation field
172
+ for alt in ("description", "detail"):
173
+ if alt in data and "context" not in data:
174
+ data["context"] = data.pop(alt)
175
+ return data
176
+
177
+ context: str = Field(
178
+ default="", description="Why this trust exists"
179
+ )
180
+
181
+
182
+ class OperationalContext(BaseModel, extra="allow"):
183
+ """How this company actually operates day-to-day.
184
+
185
+ Accepts flexible formats for all fields.
186
+ """
187
+
188
+ recent_incidents: list[Any] = Field(default_factory=list)
189
+ audit_findings: list[Any] = Field(default_factory=list)
190
+ maintenance_windows: list[Any] | Any = Field(default_factory=list)
191
+ vendor_access: list[Any] = Field(default_factory=list)
192
+ recent_changes: list[Any] = Field(default_factory=list)
193
+
194
+ @model_validator(mode="before")
195
+ @classmethod
196
+ def _normalize_fields(cls, data: Any) -> Any:
197
+ if isinstance(data, dict):
198
+ # Accept 'compliance' as alias for 'compliance_frameworks'
199
+ if "compliance" in data and "compliance_frameworks" not in data:
200
+ data["compliance_frameworks"] = data.pop("compliance")
201
+ # Normalize maintenance_windows dict to list
202
+ mw = data.get("maintenance_windows")
203
+ if isinstance(mw, dict):
204
+ data["maintenance_windows"] = [f"{k}: {v}" for k, v in mw.items()]
205
+ elif isinstance(mw, str):
206
+ data["maintenance_windows"] = [mw]
207
+ # Normalize list-of-dicts to list-of-strings where needed
208
+ for field in ("recent_incidents", "vendor_access", "recent_changes"):
209
+ items = data.get(field, [])
210
+ if items and isinstance(items[0], dict):
211
+ data[field] = [
212
+ item.get("description", "") or " | ".join(f"{k}: {v}" for k, v in item.items())
213
+ for item in items
214
+ ]
215
+ return data
216
+
217
+ compliance_frameworks: list[str] = Field(default_factory=list)
218
+
219
+
220
+ # ---------------------------------------------------------------------------
221
+ # Topology primitives
222
+ # ---------------------------------------------------------------------------
223
+
224
+
225
+ class ExposurePolicy(BaseModel):
226
+ """Per-host exposure configuration."""
227
+
228
+ level: Literal["public", "hidden", "authenticated", "misconfigured"] = "public"
229
+ auth_required: bool = False
230
+ notes: str = ""
231
+
232
+
233
+ class Host(BaseModel):
234
+ """A single host (container) in the range topology."""
235
+
236
+ name: str = Field(..., description="Unique hostname, e.g. 'web', 'db'")
237
+ zone: str = Field(..., description="Network zone this host belongs to")
238
+ purpose: str = Field(
239
+ default="",
240
+ description="Why this host exists in the company, e.g. 'Customer-facing referral portal'",
241
+ )
242
+ hostname: str = Field(
243
+ default="",
244
+ description="FQDN in the company domain, e.g. 'portal.meridianhealth.local'",
245
+ )
246
+ services: list[str] = Field(
247
+ default_factory=list,
248
+ description="Services running on this host, e.g. ['nginx', 'php', 'sshd']",
249
+ )
250
+ connects_to: list[str] = Field(
251
+ default_factory=list,
252
+ description="Hostnames this host initiates connections to",
253
+ )
254
+ os: str = Field(
255
+ default="ubuntu:22.04",
256
+ description="Base OS image for the container",
257
+ )
258
+ exposure: ExposurePolicy = Field(default_factory=ExposurePolicy)
259
+
260
+
261
+ class Network(BaseModel):
262
+ """A named network zone with an optional CIDR."""
263
+
264
+ name: str = Field(..., description="Zone name, e.g. 'dmz', 'internal'")
265
+ cidr: str | None = Field(
266
+ default=None,
267
+ description="Subnet CIDR, e.g. '10.0.1.0/24'",
268
+ )
269
+
270
+
271
+ class FirewallRule(BaseModel):
272
+ """A directional firewall rule between two zones."""
273
+
274
+ action: Literal["allow", "deny"] = Field(
275
+ ..., description="Whether traffic is allowed or denied"
276
+ )
277
+ from_zone: str = Field(..., description="Source zone")
278
+ to_zone: str = Field(..., description="Destination zone")
279
+ ports: list[int] = Field(
280
+ default_factory=list,
281
+ description="TCP ports this rule applies to (empty = all ports)",
282
+ )
283
+
284
+
285
+ class Topology(BaseModel):
286
+ """Full network topology: hosts, networks, and firewall rules."""
287
+
288
+ hosts: list[Host] = Field(..., min_length=1)
289
+ networks: list[Network] = Field(..., min_length=1)
290
+ firewall_rules: list[FirewallRule] = Field(default_factory=list)
291
+
292
+ @model_validator(mode="after")
293
+ def _hosts_reference_valid_zones(self) -> "Topology":
294
+ zone_names = {n.name for n in self.networks}
295
+ for host in self.hosts:
296
+ if host.zone not in zone_names:
297
+ raise ValueError(
298
+ f"Host '{host.name}' references zone '{host.zone}' "
299
+ f"which is not defined in networks: {sorted(zone_names)}"
300
+ )
301
+ return self
302
+
303
+ @model_validator(mode="after")
304
+ def _firewall_rules_reference_valid_zones(self) -> "Topology":
305
+ zone_names = {n.name for n in self.networks}
306
+ for rule in self.firewall_rules:
307
+ for attr in ("from_zone", "to_zone"):
308
+ zone = getattr(rule, attr)
309
+ if zone not in zone_names:
310
+ raise ValueError(
311
+ f"Firewall rule references zone '{zone}' "
312
+ f"which is not defined in networks: {sorted(zone_names)}"
313
+ )
314
+ return self
315
+
316
+
317
+ # ---------------------------------------------------------------------------
318
+ # Difficulty envelope
319
+ # ---------------------------------------------------------------------------
320
+
321
+
322
+ class Difficulty(BaseModel):
323
+ """Difficulty constraints the Validator enforces on generated ranges."""
324
+
325
+ max_steps: int = Field(
326
+ ..., gt=0, description="Maximum golden-path steps allowed"
327
+ )
328
+ min_vulns: int = Field(
329
+ default=1, ge=1, description="Minimum planted vulnerabilities"
330
+ )
331
+ max_vulns: int = Field(
332
+ default=3, ge=1, description="Maximum planted vulnerabilities"
333
+ )
334
+
335
+ @model_validator(mode="after")
336
+ def _min_le_max(self) -> "Difficulty":
337
+ if self.min_vulns > self.max_vulns:
338
+ raise ValueError(
339
+ f"min_vulns ({self.min_vulns}) must be <= max_vulns ({self.max_vulns})"
340
+ )
341
+ return self
342
+
343
+
344
+ # ---------------------------------------------------------------------------
345
+ # Top-level manifest
346
+ # ---------------------------------------------------------------------------
347
+
348
+
349
+ class Manifest(BaseModel):
350
+ """Top-level range manifest -- the contract between humans and the Builder.
351
+
352
+ Required fields define the network and vuln envelope. Optional fields
353
+ (company, users, NPCs, data, processes) provide narrative context that
354
+ lets the Builder generate realistic, interconnected scenarios.
355
+ """
356
+
357
+ name: str = Field(..., description="Human-readable range name")
358
+ tier: int = Field(..., ge=1, le=5, description="Complexity tier (1-5)")
359
+
360
+ # Company context (optional but strongly encouraged)
361
+ company: Company | None = None
362
+ departments: list[Department] = Field(default_factory=list)
363
+ users: list[User] = Field(default_factory=list)
364
+ npc_personas: list[NPCProfile] = Field(default_factory=list)
365
+ data_inventory: list[DataAsset] = Field(default_factory=list)
366
+ business_processes: list[BusinessProcess] = Field(default_factory=list)
367
+
368
+ # Infrastructure realism (optional -- enriches Builder context)
369
+ tech_stack: TechStack | Any | None = None
370
+ credential_policy: CredentialPolicy | Any | None = None
371
+ monitoring_coverage: MonitoringCoverage | Any | None = None
372
+ trust_relationships: list[TrustRelationship] = Field(default_factory=list)
373
+ operational_context: OperationalContext | Any | None = None
374
+
375
+ # Core topology and vuln envelope
376
+ topology: Topology
377
+ bug_families: list[str] = Field(
378
+ ...,
379
+ min_length=1,
380
+ description="Vulnerability classes the Builder may plant (LLM generates details from these type names)",
381
+ )
382
+ task_families: list[str] = Field(
383
+ default=["exploit", "investigate", "patch", "report"],
384
+ description="Task types agents may be asked to perform",
385
+ )
386
+ difficulty: Difficulty
387
+
388
+
389
+ # ---------------------------------------------------------------------------
390
+ # Loader
391
+ # ---------------------------------------------------------------------------
392
+
393
+
394
+ def load_manifest(path: str | Path) -> Manifest:
395
+ """Load a YAML manifest file and return a validated ``Manifest``.
396
+
397
+ Raises ``FileNotFoundError`` if the file does not exist.
398
+ Raises ``pydantic.ValidationError`` if the content is invalid.
399
+ """
400
+ path = Path(path)
401
+ if not path.exists():
402
+ raise FileNotFoundError(f"Manifest not found: {path}")
403
+ with open(path) as fh:
404
+ raw = yaml.safe_load(fh)
405
+ return Manifest(**raw)
src/open_range/server/environment.py CHANGED
@@ -181,7 +181,9 @@ class RangeEnvironment(Environment[RangeAction, RangeObservation, RangeState]):
181
  elif self._get_docker() is not None:
182
  self._execution_mode = "docker"
183
  else:
184
- self._execution_mode = "subprocess"
 
 
185
 
186
  # -----------------------------------------------------------------
187
  # Docker helpers
@@ -926,6 +928,20 @@ class RangeEnvironment(Environment[RangeAction, RangeObservation, RangeState]):
926
  snapshot.compose = compose
927
  return True
928
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
929
  def _refresh_npc_traffic_log(self) -> None:
930
  """Pull latest NPC activity from the manager into the traffic log."""
931
  if self._npc_manager is not None:
@@ -1433,6 +1449,7 @@ class RangeEnvironment(Environment[RangeAction, RangeObservation, RangeState]):
1433
  # Runtime-backed episodes boot a fresh project per reset. Manual/mock
1434
  # snapshots still use direct artifact application.
1435
  activated = self._activate_runtime_snapshot(self._snapshot, episode_id=eid)
 
1436
 
1437
  # Start services BEFORE applying snapshot data so that daemons
1438
  # (MySQL, slapd, etc.) are ready to receive SQL / LDIF payloads.
 
181
  elif self._get_docker() is not None:
182
  self._execution_mode = "docker"
183
  else:
184
+ # Missing Docker must not silently change the environment
185
+ # semantics to host-shell execution. Degrade to mock mode.
186
+ self._execution_mode = "docker"
187
 
188
  # -----------------------------------------------------------------
189
  # Docker helpers
 
928
  snapshot.compose = compose
929
  return True
930
 
931
+ def _ensure_clean_reset_path(self, *, activated: bool) -> None:
932
+ """Reject live Docker resets that would overlay onto mutable containers."""
933
+ if activated or self._execution_mode != "docker":
934
+ return
935
+ if self._docker_available is False:
936
+ return
937
+ if self._get_docker() is None:
938
+ return
939
+ raise RuntimeError(
940
+ "Direct docker snapshot reset is disabled because it overlays mutable "
941
+ "container state across episodes. Use ManagedSnapshotRuntime or "
942
+ "explicitly opt into execution_mode='subprocess'."
943
+ )
944
+
945
  def _refresh_npc_traffic_log(self) -> None:
946
  """Pull latest NPC activity from the manager into the traffic log."""
947
  if self._npc_manager is not None:
 
1449
  # Runtime-backed episodes boot a fresh project per reset. Manual/mock
1450
  # snapshots still use direct artifact application.
1451
  activated = self._activate_runtime_snapshot(self._snapshot, episode_id=eid)
1452
+ self._ensure_clean_reset_path(activated=activated)
1453
 
1454
  # Start services BEFORE applying snapshot data so that daemons
1455
  # (MySQL, slapd, etc.) are ready to receive SQL / LDIF payloads.
tests/test_environment.py CHANGED
@@ -1,5 +1,7 @@
1
  """Tests for RangeEnvironment lifecycle — all run without Docker."""
2
 
 
 
3
  import pytest
4
 
5
  from open_range.protocols import (
@@ -72,6 +74,21 @@ class TestReset:
72
  # In mock mode service health is unknown, but hosts should be tracked.
73
  assert set(env.state.services_status.keys()) == {"attacker", "siem"}
74
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
 
76
  class TestTargetResolution:
77
  """Target selection should honor manifest-compiled metadata."""
 
1
  """Tests for RangeEnvironment lifecycle — all run without Docker."""
2
 
3
+ from unittest.mock import patch
4
+
5
  import pytest
6
 
7
  from open_range.protocols import (
 
74
  # In mock mode service health is unknown, but hosts should be tracked.
75
  assert set(env.state.services_status.keys()) == {"attacker", "siem"}
76
 
77
+ def test_auto_without_docker_uses_mock_docker_mode(self):
78
+ def fake_get_docker(self):
79
+ self._docker_available = False
80
+ return None
81
+
82
+ with patch.object(RangeEnvironment, "_get_docker", fake_get_docker):
83
+ env = RangeEnvironment(docker_available=None, execution_mode="auto")
84
+
85
+ env.reset(snapshot=_MINIMAL_SNAPSHOT)
86
+ obs = env.step(RangeAction(command="printf test", mode="red"))
87
+
88
+ assert env._execution_mode == "docker"
89
+ assert obs.stderr == ""
90
+ assert "[mock] executed on attacker" in obs.stdout
91
+
92
 
93
  class TestTargetResolution:
94
  """Target selection should honor manifest-compiled metadata."""
tests/test_lint.py CHANGED
@@ -7,12 +7,11 @@ from pathlib import Path
7
  import shutil
8
  import subprocess
9
  import sys
10
- import tomllib
11
 
12
  import pytest
13
  import yaml
14
 
15
- from manifests.schema import Manifest, load_manifest
16
  from open_range.lint import lint_file, lint_manifest
17
 
18
 
@@ -291,15 +290,12 @@ class TestLintFile:
291
 
292
 
293
  class TestPackagingAndInvocation:
294
- def test_pyproject_includes_manifests_package(self):
295
- pyproject = tomllib.loads((ROOT / "pyproject.toml").read_text())
296
- packages = pyproject["tool"]["setuptools"]["packages"]
297
- assert "manifests" in packages
298
 
299
  def test_lint_module_runs_outside_repo_with_packaged_layout(self, tmp_path):
300
  site_root = tmp_path / "site"
301
  shutil.copytree(ROOT / "src" / "open_range", site_root / "open_range")
302
- shutil.copytree(ROOT / "src" / "manifests", site_root / "manifests")
303
 
304
  outside = tmp_path / "outside"
305
  outside.mkdir()
 
7
  import shutil
8
  import subprocess
9
  import sys
 
10
 
11
  import pytest
12
  import yaml
13
 
14
+ from open_range.manifest_schema import Manifest, load_manifest
15
  from open_range.lint import lint_file, lint_manifest
16
 
17
 
 
290
 
291
 
292
  class TestPackagingAndInvocation:
293
+ def test_open_range_package_contains_manifest_schema(self):
294
+ assert (ROOT / "src" / "open_range" / "manifest_schema.py").exists()
 
 
295
 
296
  def test_lint_module_runs_outside_repo_with_packaged_layout(self, tmp_path):
297
  site_root = tmp_path / "site"
298
  shutil.copytree(ROOT / "src" / "open_range", site_root / "open_range")
 
299
 
300
  outside = tmp_path / "outside"
301
  outside.mkdir()
tests/test_runtime.py CHANGED
@@ -483,6 +483,22 @@ class TestManagedSnapshotRuntime:
483
 
484
 
485
  class TestEnvironmentRuntimeIntegration:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
486
  def test_reset_uses_managed_runtime_snapshot(self, tier1_manifest, tmp_path):
487
  runtime = ManagedSnapshotRuntime(
488
  manifest=tier1_manifest,
 
483
 
484
 
485
  class TestEnvironmentRuntimeIntegration:
486
+ def test_reset_rejects_direct_live_docker_overlay(self):
487
+ snapshot = SnapshotSpec(
488
+ topology={"hosts": ["attacker", "siem", "web"]},
489
+ compose={"services": {"attacker": {}, "siem": {}, "web": {}}},
490
+ files={"web:/var/www/html/index.php": "<?php echo 'hi'; ?>"},
491
+ task={"red_briefing": "Go.", "blue_briefing": "Watch."},
492
+ )
493
+ env = RangeEnvironment(docker_available=True, execution_mode="docker")
494
+ env._get_docker = lambda: object() # type: ignore[method-assign]
495
+
496
+ try:
497
+ with pytest.raises(RuntimeError, match="Direct docker snapshot reset is disabled"):
498
+ env.reset(snapshot=snapshot)
499
+ finally:
500
+ env.close()
501
+
502
  def test_reset_uses_managed_runtime_snapshot(self, tier1_manifest, tmp_path):
503
  runtime = ManagedSnapshotRuntime(
504
  manifest=tier1_manifest,