Spaces:

abrown31
/

open-range

Runtime error

App Files Files Community

Lars Talian commited on Mar 8

Commit

6e4e622

unverified ·

1 Parent(s): 202e768

Fix lint packaging and fail closed runtime paths (#93)

Browse files

Files changed (8) hide show

src/manifests/schema.py +45 -397
src/open_range/__init__.py +33 -3
src/open_range/lint.py +1 -1
src/open_range/manifest_schema.py +405 -0
src/open_range/server/environment.py +18 -1
tests/test_environment.py +17 -0
tests/test_lint.py +3 -7
tests/test_runtime.py +16 -0

src/manifests/schema.py CHANGED Viewed

@@ -1,397 +1,45 @@
-"""Pydantic models for OpenRange manifest validation.
-A manifest declares the *world* of a cyber range: the fictional company, its
-people, data, business processes, network topology, and the vulnerability
-families the Builder may plant.  This rich context lets the Builder LLM
-generate scenarios where vulns, NPCs, and data flows make narrative sense --
-not just "a web server with SQLi" but "a patient-referral portal where the
-search endpoint trusts user input because the original dev left six months ago
-and nobody reviewed it since."
-"""
-from __future__ import annotations
-from pathlib import Path
-from typing import Any, Literal
-import yaml
-from pydantic import BaseModel, Field, model_validator
-# ---------------------------------------------------------------------------
-# Company context -- gives the Builder a story to build from
-# ---------------------------------------------------------------------------
-class Company(BaseModel):
-    """The fictional company that owns this range."""
-    name: str = Field(..., description="Company name, e.g. 'Meridian Health Partners'")
-    domain: str = Field(..., description="Internal FQDN domain, e.g. 'meridianhealth.local'")
-    industry: str = Field(..., description="Industry vertical, e.g. 'healthcare'")
-    description: str = Field(
-        ..., description="What the company does -- 2-3 sentences the Builder uses for narrative"
-    )
-class Department(BaseModel):
-    """An organizational unit with its own access profile."""
-    name: str
-    description: str = ""
-    hosts_accessed: list[str] = Field(
-        default_factory=list,
-        description="Hostnames staff in this dept routinely access",
-    )
-# ---------------------------------------------------------------------------
-# People: users and NPC personas
-# ---------------------------------------------------------------------------
-class User(BaseModel):
-    """A user account that exists in the range (LDAP/local)."""
-    username: str
-    full_name: str = ""
-    department: str = ""
-    role: str = ""
-    email: str = ""
-    hosts: list[str] = Field(
-        default_factory=list,
-        description="Hosts where this user has an account",
-    )
-class NPCProfile(BaseModel, extra="allow"):
-    """An NPC persona the Builder should generate traffic and behavior for."""
-    username: str = Field(..., description="Must reference a User.username")
-    security_awareness: float = Field(
-        default=0.5, ge=0.0, le=1.0,
-        description="0=clueless, 1=CISO-level. Determines susceptibility to social engineering",
-    )
-    daily_activities: list[str] = Field(
-        default_factory=list,
-        description="What this person does all day -- generates realistic traffic patterns",
-    )
-    susceptibility: dict[str, float] = Field(
-        default_factory=dict,
-        description="Attack-type -> probability of falling for it, e.g. {'phishing_email': 0.7}",
-    )
-# ---------------------------------------------------------------------------
-# Data and business processes -- tells the Builder what to protect
-# ---------------------------------------------------------------------------
-class DataAsset(BaseModel):
-    """A piece of sensitive data that exists somewhere in the range."""
-    name: str = Field(..., description="Human name, e.g. 'Patient referral records'")
-    classification: Literal["public", "internal", "confidential", "restricted"] = "internal"
-    host: str = Field(..., description="Host where this data lives")
-    location: str = Field(
-        default="", description="Path or service, e.g. '/srv/shares/hr' or 'mysql:app_db.patients'"
-    )
-    description: str = ""
-class BusinessProcess(BaseModel):
-    """A cross-service data flow the Builder should keep realistic."""
-    name: str
-    description: str = ""
-    data_flow: list[str] = Field(
-        default_factory=list,
-        description="Ordered list of host:service hops, e.g. ['web:nginx', 'db:mysql', 'siem:rsyslog']",
-    )
-# ---------------------------------------------------------------------------
-# Infrastructure realism -- software, config, and operational context
-# ---------------------------------------------------------------------------
-class TechStack(BaseModel, extra="allow"):
-    """Specific software versions and known technical debt.
-    Accepts both flat string fields and nested dicts for flexibility.
-    """
-    known_debt: list[str] = Field(default_factory=list)
-class CredentialPolicy(BaseModel, extra="allow"):
-    """How credentials work (and fail) in this organization.
-    Accepts flexible formats: flat strings or structured dicts.
-    """
-    enforcement_gaps: list[Any] = Field(default_factory=list)
-class MonitoringCoverage(BaseModel, extra="allow"):
-    """What Blue can actually see — and the blind spots Red can exploit."""
-    logged: list[Any] = Field(default_factory=list)
-    blind_spots: list[str] = Field(default_factory=list)
-    alert_rules: list[Any] = Field(default_factory=list)
-    retention_days: int = Field(default=90)
-class TrustRelationship(BaseModel, extra="allow"):
-    """Who trusts whom — the social graph Red can exploit for lateral movement.
-    Accepts 'from'/'to' (YAML-friendly) or 'source'/'target' field names.
-    """
-    type: str = Field(
-        ..., description="Relationship type: reports_to, delegates_access, shares_credentials, trusts_email"
-    )
-    # Accept either naming convention
-    source: str = ""
-    target: str = ""
-    # 'from' and 'to' are Python keywords, handle via model_validator
-    @model_validator(mode="before")
-    @classmethod
-    def _normalize_field_names(cls, data: Any) -> Any:
-        if isinstance(data, dict):
-            # Normalize various naming conventions to source/target
-            for src_key in ("from", "from_user"):
-                if src_key in data and not data.get("source"):
-                    data["source"] = data.pop(src_key)
-            for tgt_key in ("to", "to_user"):
-                if tgt_key in data and not data.get("target"):
-                    data["target"] = data.pop(tgt_key)
-            # Accept 'description', 'detail', or 'context' as the explanation field
-            for alt in ("description", "detail"):
-                if alt in data and "context" not in data:
-                    data["context"] = data.pop(alt)
-        return data
-    context: str = Field(
-        default="", description="Why this trust exists"
-    )
-class OperationalContext(BaseModel, extra="allow"):
-    """How this company actually operates day-to-day.
-    Accepts flexible formats for all fields.
-    """
-    recent_incidents: list[Any] = Field(default_factory=list)
-    audit_findings: list[Any] = Field(default_factory=list)
-    maintenance_windows: list[Any] | Any = Field(default_factory=list)
-    vendor_access: list[Any] = Field(default_factory=list)
-    recent_changes: list[Any] = Field(default_factory=list)
-    @model_validator(mode="before")
-    @classmethod
-    def _normalize_fields(cls, data: Any) -> Any:
-        if isinstance(data, dict):
-            # Accept 'compliance' as alias for 'compliance_frameworks'
-            if "compliance" in data and "compliance_frameworks" not in data:
-                data["compliance_frameworks"] = data.pop("compliance")
-            # Normalize maintenance_windows dict to list
-            mw = data.get("maintenance_windows")
-            if isinstance(mw, dict):
-                data["maintenance_windows"] = [f"{k}: {v}" for k, v in mw.items()]
-            elif isinstance(mw, str):
-                data["maintenance_windows"] = [mw]
-            # Normalize list-of-dicts to list-of-strings where needed
-            for field in ("recent_incidents", "vendor_access", "recent_changes"):
-                items = data.get(field, [])
-                if items and isinstance(items[0], dict):
-                    data[field] = [
-                        item.get("description", "") or " | ".join(f"{k}: {v}" for k, v in item.items())
-                        for item in items
-                    ]
-        return data
-    compliance_frameworks: list[str] = Field(default_factory=list)
-# ---------------------------------------------------------------------------
-# Topology primitives
-# ---------------------------------------------------------------------------
-class ExposurePolicy(BaseModel):
-    """Per-host exposure configuration."""
-    level: Literal["public", "hidden", "authenticated", "misconfigured"] = "public"
-    auth_required: bool = False
-    notes: str = ""
-class Host(BaseModel):
-    """A single host (container) in the range topology."""
-    name: str = Field(..., description="Unique hostname, e.g. 'web', 'db'")
-    zone: str = Field(..., description="Network zone this host belongs to")
-    purpose: str = Field(
-        default="",
-        description="Why this host exists in the company, e.g. 'Customer-facing referral portal'",
-    )
-    hostname: str = Field(
-        default="",
-        description="FQDN in the company domain, e.g. 'portal.meridianhealth.local'",
-    )
-    services: list[str] = Field(
-        default_factory=list,
-        description="Services running on this host, e.g. ['nginx', 'php', 'sshd']",
-    )
-    connects_to: list[str] = Field(
-        default_factory=list,
-        description="Hostnames this host initiates connections to",
-    )
-    os: str = Field(
-        default="ubuntu:22.04",
-        description="Base OS image for the container",
-    )
-    exposure: ExposurePolicy = Field(default_factory=ExposurePolicy)
-class Network(BaseModel):
-    """A named network zone with an optional CIDR."""
-    name: str = Field(..., description="Zone name, e.g. 'dmz', 'internal'")
-    cidr: str | None = Field(
-        default=None,
-        description="Subnet CIDR, e.g. '10.0.1.0/24'",
-    )
-class FirewallRule(BaseModel):
-    """A directional firewall rule between two zones."""
-    action: Literal["allow", "deny"] = Field(
-        ..., description="Whether traffic is allowed or denied"
-    )
-    from_zone: str = Field(..., description="Source zone")
-    to_zone: str = Field(..., description="Destination zone")
-    ports: list[int] = Field(
-        default_factory=list,
-        description="TCP ports this rule applies to (empty = all ports)",
-    )
-class Topology(BaseModel):
-    """Full network topology: hosts, networks, and firewall rules."""
-    hosts: list[Host] = Field(..., min_length=1)
-    networks: list[Network] = Field(..., min_length=1)
-    firewall_rules: list[FirewallRule] = Field(default_factory=list)
-    @model_validator(mode="after")
-    def _hosts_reference_valid_zones(self) -> "Topology":
-        zone_names = {n.name for n in self.networks}
-        for host in self.hosts:
-            if host.zone not in zone_names:
-                raise ValueError(
-                    f"Host '{host.name}' references zone '{host.zone}' "
-                    f"which is not defined in networks: {sorted(zone_names)}"
-                )
-        return self
-    @model_validator(mode="after")
-    def _firewall_rules_reference_valid_zones(self) -> "Topology":
-        zone_names = {n.name for n in self.networks}
-        for rule in self.firewall_rules:
-            for attr in ("from_zone", "to_zone"):
-                zone = getattr(rule, attr)
-                if zone not in zone_names:
-                    raise ValueError(
-                        f"Firewall rule references zone '{zone}' "
-                        f"which is not defined in networks: {sorted(zone_names)}"
-                    )
-        return self
-# ---------------------------------------------------------------------------
-# Difficulty envelope
-# ---------------------------------------------------------------------------
-class Difficulty(BaseModel):
-    """Difficulty constraints the Validator enforces on generated ranges."""
-    max_steps: int = Field(
-        ..., gt=0, description="Maximum golden-path steps allowed"
-    )
-    min_vulns: int = Field(
-        default=1, ge=1, description="Minimum planted vulnerabilities"
-    )
-    max_vulns: int = Field(
-        default=3, ge=1, description="Maximum planted vulnerabilities"
-    )
-    @model_validator(mode="after")
-    def _min_le_max(self) -> "Difficulty":
-        if self.min_vulns > self.max_vulns:
-            raise ValueError(
-                f"min_vulns ({self.min_vulns}) must be <= max_vulns ({self.max_vulns})"
-            )
-        return self
-# ---------------------------------------------------------------------------
-# Top-level manifest
-# ---------------------------------------------------------------------------
-class Manifest(BaseModel):
-    """Top-level range manifest -- the contract between humans and the Builder.
-    Required fields define the network and vuln envelope.  Optional fields
-    (company, users, NPCs, data, processes) provide narrative context that
-    lets the Builder generate realistic, interconnected scenarios.
-    """
-    name: str = Field(..., description="Human-readable range name")
-    tier: int = Field(..., ge=1, le=5, description="Complexity tier (1-5)")
-    # Company context (optional but strongly encouraged)
-    company: Company | None = None
-    departments: list[Department] = Field(default_factory=list)
-    users: list[User] = Field(default_factory=list)
-    npc_personas: list[NPCProfile] = Field(default_factory=list)
-    data_inventory: list[DataAsset] = Field(default_factory=list)
-    business_processes: list[BusinessProcess] = Field(default_factory=list)
-    # Infrastructure realism (optional — enriches Builder context)
-    tech_stack: TechStack | Any | None = None
-    credential_policy: CredentialPolicy | Any | None = None
-    monitoring_coverage: MonitoringCoverage | Any | None = None
-    trust_relationships: list[TrustRelationship] = Field(default_factory=list)
-    operational_context: OperationalContext | Any | None = None
-    # Core topology and vuln envelope
-    topology: Topology
-    bug_families: list[str] = Field(
-        ...,
-        min_length=1,
-        description="Vulnerability classes the Builder may plant (LLM generates details from these type names)",
-    )
-    task_families: list[str] = Field(
-        default=["exploit", "investigate", "patch", "report"],
-        description="Task types agents may be asked to perform",
-    )
-    difficulty: Difficulty
-# ---------------------------------------------------------------------------
-# Loader
-# ---------------------------------------------------------------------------
-def load_manifest(path: str | Path) -> Manifest:
-    """Load a YAML manifest file and return a validated ``Manifest``.
-    Raises ``FileNotFoundError`` if the file does not exist.
-    Raises ``pydantic.ValidationError`` if the content is invalid.
-    """
-    path = Path(path)
-    if not path.exists():
-        raise FileNotFoundError(f"Manifest not found: {path}")
-    with open(path) as fh:
-        raw = yaml.safe_load(fh)
-    return Manifest(**raw)

+"""Backward-compatible shim for the packaged manifest schema."""
+from open_range.manifest_schema import (
+    BusinessProcess,
+    Company,
+    CredentialPolicy,
+    DataAsset,
+    Department,
+    Difficulty,
+    ExposurePolicy,
+    FirewallRule,
+    Host,
+    Manifest,
+    MonitoringCoverage,
+    NPCProfile,
+    Network,
+    OperationalContext,
+    TechStack,
+    Topology,
+    TrustRelationship,
+    User,
+    load_manifest,
+)
+__all__ = [
+    "BusinessProcess",
+    "Company",
+    "CredentialPolicy",
+    "DataAsset",
+    "Department",
+    "Difficulty",
+    "ExposurePolicy",
+    "FirewallRule",
+    "Host",
+    "Manifest",
+    "MonitoringCoverage",
+    "NPCProfile",
+    "Network",
+    "OperationalContext",
+    "TechStack",
+    "Topology",
+    "TrustRelationship",
+    "User",
+    "load_manifest",
+]

src/open_range/__init__.py CHANGED Viewed

@@ -1,8 +1,13 @@
 """OpenRange public package surface."""
-from open_range.client.client import OpenRangeEnv
-from open_range.models import RangeAction, RangeObservation, RangeState
-from open_range.server.environment import RangeEnvironment
 __all__ = [
     "OpenRangeEnv",
@@ -11,3 +16,28 @@ __all__ = [
     "RangeObservation",
     "RangeState",
 ]

 """OpenRange public package surface."""
+from __future__ import annotations
+from typing import TYPE_CHECKING, Any
+if TYPE_CHECKING:
+    from open_range.client.client import OpenRangeEnv
+    from open_range.models import RangeAction, RangeObservation, RangeState
+    from open_range.server.environment import RangeEnvironment
 __all__ = [
     "OpenRangeEnv",
     "RangeObservation",
     "RangeState",
 ]
+def __getattr__(name: str) -> Any:
+    """Resolve public exports lazily so light CLIs avoid heavy imports."""
+    if name == "OpenRangeEnv":
+        from open_range.client.client import OpenRangeEnv
+        return OpenRangeEnv
+    if name == "RangeAction":
+        from open_range.models import RangeAction
+        return RangeAction
+    if name == "RangeObservation":
+        from open_range.models import RangeObservation
+        return RangeObservation
+    if name == "RangeState":
+        from open_range.models import RangeState
+        return RangeState
+    if name == "RangeEnvironment":
+        from open_range.server.environment import RangeEnvironment
+        return RangeEnvironment
+    raise AttributeError(f"module {__name__!r} has no attribute {name!r}")

src/open_range/lint.py CHANGED Viewed

@@ -17,7 +17,7 @@ import sys
 from pathlib import Path
 from typing import Any
-from manifests.schema import Manifest, load_manifest
 # ---------------------------------------------------------------------------

 from pathlib import Path
 from typing import Any
+from open_range.manifest_schema import Manifest, load_manifest
 # ---------------------------------------------------------------------------

src/open_range/manifest_schema.py ADDED Viewed

	@@ -0,0 +1,405 @@

+"""Pydantic models for OpenRange manifest validation.
+A manifest declares the *world* of a cyber range: the fictional company, its
+people, data, business processes, network topology, and the vulnerability
+families the Builder may plant. This module lives under ``open_range`` so
+installed tooling such as ``python -m open_range.lint`` does not depend on the
+caller also importing the top-level ``manifests`` package.
+"""
+from __future__ import annotations
+from pathlib import Path
+from typing import Any, Literal
+import yaml
+from pydantic import BaseModel, Field, model_validator
+# ---------------------------------------------------------------------------
+# Company context -- gives the Builder a story to build from
+# ---------------------------------------------------------------------------
+class Company(BaseModel):
+    """The fictional company that owns this range."""
+    name: str = Field(..., description="Company name, e.g. 'Meridian Health Partners'")
+    domain: str = Field(..., description="Internal FQDN domain, e.g. 'meridianhealth.local'")
+    industry: str = Field(..., description="Industry vertical, e.g. 'healthcare'")
+    description: str = Field(
+        ..., description="What the company does -- 2-3 sentences the Builder uses for narrative"
+    )
+class Department(BaseModel):
+    """An organizational unit with its own access profile."""
+    name: str
+    description: str = ""
+    hosts_accessed: list[str] = Field(
+        default_factory=list,
+        description="Hostnames staff in this dept routinely access",
+    )
+# ---------------------------------------------------------------------------
+# People: users and NPC personas
+# ---------------------------------------------------------------------------
+class User(BaseModel):
+    """A user account that exists in the range (LDAP/local)."""
+    username: str
+    full_name: str = ""
+    department: str = ""
+    role: str = ""
+    email: str = ""
+    hosts: list[str] = Field(
+        default_factory=list,
+        description="Hosts where this user has an account",
+    )
+class NPCProfile(BaseModel, extra="allow"):
+    """An NPC persona the Builder should generate traffic and behavior for."""
+    username: str = Field(..., description="Must reference a User.username")
+    security_awareness: float = Field(
+        default=0.5,
+        ge=0.0,
+        le=1.0,
+        description="0=clueless, 1=CISO-level. Determines susceptibility to social engineering",
+    )
+    daily_activities: list[str] = Field(
+        default_factory=list,
+        description="What this person does all day -- generates realistic traffic patterns",
+    )
+    susceptibility: dict[str, float] = Field(
+        default_factory=dict,
+        description="Attack-type -> probability of falling for it, e.g. {'phishing_email': 0.7}",
+    )
+# ---------------------------------------------------------------------------
+# Data and business processes -- tells the Builder what to protect
+# ---------------------------------------------------------------------------
+class DataAsset(BaseModel):
+    """A piece of sensitive data that exists somewhere in the range."""
+    name: str = Field(..., description="Human name, e.g. 'Patient referral records'")
+    classification: Literal["public", "internal", "confidential", "restricted"] = "internal"
+    host: str = Field(..., description="Host where this data lives")
+    location: str = Field(
+        default="", description="Path or service, e.g. '/srv/shares/hr' or 'mysql:app_db.patients'"
+    )
+    description: str = ""
+class BusinessProcess(BaseModel):
+    """A cross-service data flow the Builder should keep realistic."""
+    name: str
+    description: str = ""
+    data_flow: list[str] = Field(
+        default_factory=list,
+        description="Ordered list of host:service hops, e.g. ['web:nginx', 'db:mysql', 'siem:rsyslog']",
+    )
+# ---------------------------------------------------------------------------
+# Infrastructure realism -- software, config, and operational context
+# ---------------------------------------------------------------------------
+class TechStack(BaseModel, extra="allow"):
+    """Specific software versions and known technical debt.
+    Accepts both flat string fields and nested dicts for flexibility.
+    """
+    known_debt: list[str] = Field(default_factory=list)
+class CredentialPolicy(BaseModel, extra="allow"):
+    """How credentials work (and fail) in this organization.
+    Accepts flexible formats: flat strings or structured dicts.
+    """
+    enforcement_gaps: list[Any] = Field(default_factory=list)
+class MonitoringCoverage(BaseModel, extra="allow"):
+    """What Blue can actually see -- and the blind spots Red can exploit."""
+    logged: list[Any] = Field(default_factory=list)
+    blind_spots: list[str] = Field(default_factory=list)
+    alert_rules: list[Any] = Field(default_factory=list)
+    retention_days: int = Field(default=90)
+class TrustRelationship(BaseModel, extra="allow"):
+    """Who trusts whom -- the social graph Red can exploit for lateral movement.
+    Accepts 'from'/'to' (YAML-friendly) or 'source'/'target' field names.
+    """
+    type: str = Field(
+        ..., description="Relationship type: reports_to, delegates_access, shares_credentials, trusts_email"
+    )
+    # Accept either naming convention
+    source: str = ""
+    target: str = ""
+    # 'from' and 'to' are Python keywords, handle via model_validator
+    @model_validator(mode="before")
+    @classmethod
+    def _normalize_field_names(cls, data: Any) -> Any:
+        if isinstance(data, dict):
+            # Normalize various naming conventions to source/target
+            for src_key in ("from", "from_user"):
+                if src_key in data and not data.get("source"):
+                    data["source"] = data.pop(src_key)
+            for tgt_key in ("to", "to_user"):
+                if tgt_key in data and not data.get("target"):
+                    data["target"] = data.pop(tgt_key)
+            # Accept 'description', 'detail', or 'context' as the explanation field
+            for alt in ("description", "detail"):
+                if alt in data and "context" not in data:
+                    data["context"] = data.pop(alt)
+        return data
+    context: str = Field(
+        default="", description="Why this trust exists"
+    )
+class OperationalContext(BaseModel, extra="allow"):
+    """How this company actually operates day-to-day.
+    Accepts flexible formats for all fields.
+    """
+    recent_incidents: list[Any] = Field(default_factory=list)
+    audit_findings: list[Any] = Field(default_factory=list)
+    maintenance_windows: list[Any] | Any = Field(default_factory=list)
+    vendor_access: list[Any] = Field(default_factory=list)
+    recent_changes: list[Any] = Field(default_factory=list)
+    @model_validator(mode="before")
+    @classmethod
+    def _normalize_fields(cls, data: Any) -> Any:
+        if isinstance(data, dict):
+            # Accept 'compliance' as alias for 'compliance_frameworks'
+            if "compliance" in data and "compliance_frameworks" not in data:
+                data["compliance_frameworks"] = data.pop("compliance")
+            # Normalize maintenance_windows dict to list
+            mw = data.get("maintenance_windows")
+            if isinstance(mw, dict):
+                data["maintenance_windows"] = [f"{k}: {v}" for k, v in mw.items()]
+            elif isinstance(mw, str):
+                data["maintenance_windows"] = [mw]
+            # Normalize list-of-dicts to list-of-strings where needed
+            for field in ("recent_incidents", "vendor_access", "recent_changes"):
+                items = data.get(field, [])
+                if items and isinstance(items[0], dict):
+                    data[field] = [
+                        item.get("description", "") or " | ".join(f"{k}: {v}" for k, v in item.items())
+                        for item in items
+                    ]
+        return data
+    compliance_frameworks: list[str] = Field(default_factory=list)
+# ---------------------------------------------------------------------------
+# Topology primitives
+# ---------------------------------------------------------------------------
+class ExposurePolicy(BaseModel):
+    """Per-host exposure configuration."""
+    level: Literal["public", "hidden", "authenticated", "misconfigured"] = "public"
+    auth_required: bool = False
+    notes: str = ""
+class Host(BaseModel):
+    """A single host (container) in the range topology."""
+    name: str = Field(..., description="Unique hostname, e.g. 'web', 'db'")
+    zone: str = Field(..., description="Network zone this host belongs to")
+    purpose: str = Field(
+        default="",
+        description="Why this host exists in the company, e.g. 'Customer-facing referral portal'",
+    )
+    hostname: str = Field(
+        default="",
+        description="FQDN in the company domain, e.g. 'portal.meridianhealth.local'",
+    )
+    services: list[str] = Field(
+        default_factory=list,
+        description="Services running on this host, e.g. ['nginx', 'php', 'sshd']",
+    )
+    connects_to: list[str] = Field(
+        default_factory=list,
+        description="Hostnames this host initiates connections to",
+    )
+    os: str = Field(
+        default="ubuntu:22.04",
+        description="Base OS image for the container",
+    )
+    exposure: ExposurePolicy = Field(default_factory=ExposurePolicy)
+class Network(BaseModel):
+    """A named network zone with an optional CIDR."""
+    name: str = Field(..., description="Zone name, e.g. 'dmz', 'internal'")
+    cidr: str | None = Field(
+        default=None,
+        description="Subnet CIDR, e.g. '10.0.1.0/24'",
+    )
+class FirewallRule(BaseModel):
+    """A directional firewall rule between two zones."""
+    action: Literal["allow", "deny"] = Field(
+        ..., description="Whether traffic is allowed or denied"
+    )
+    from_zone: str = Field(..., description="Source zone")
+    to_zone: str = Field(..., description="Destination zone")
+    ports: list[int] = Field(
+        default_factory=list,
+        description="TCP ports this rule applies to (empty = all ports)",
+    )
+class Topology(BaseModel):
+    """Full network topology: hosts, networks, and firewall rules."""
+    hosts: list[Host] = Field(..., min_length=1)
+    networks: list[Network] = Field(..., min_length=1)
+    firewall_rules: list[FirewallRule] = Field(default_factory=list)
+    @model_validator(mode="after")
+    def _hosts_reference_valid_zones(self) -> "Topology":
+        zone_names = {n.name for n in self.networks}
+        for host in self.hosts:
+            if host.zone not in zone_names:
+                raise ValueError(
+                    f"Host '{host.name}' references zone '{host.zone}' "
+                    f"which is not defined in networks: {sorted(zone_names)}"
+                )
+        return self
+    @model_validator(mode="after")
+    def _firewall_rules_reference_valid_zones(self) -> "Topology":
+        zone_names = {n.name for n in self.networks}
+        for rule in self.firewall_rules:
+            for attr in ("from_zone", "to_zone"):
+                zone = getattr(rule, attr)
+                if zone not in zone_names:
+                    raise ValueError(
+                        f"Firewall rule references zone '{zone}' "
+                        f"which is not defined in networks: {sorted(zone_names)}"
+                    )
+        return self
+# ---------------------------------------------------------------------------
+# Difficulty envelope
+# ---------------------------------------------------------------------------
+class Difficulty(BaseModel):
+    """Difficulty constraints the Validator enforces on generated ranges."""
+    max_steps: int = Field(
+        ..., gt=0, description="Maximum golden-path steps allowed"
+    )
+    min_vulns: int = Field(
+        default=1, ge=1, description="Minimum planted vulnerabilities"
+    )
+    max_vulns: int = Field(
+        default=3, ge=1, description="Maximum planted vulnerabilities"
+    )
+    @model_validator(mode="after")
+    def _min_le_max(self) -> "Difficulty":
+        if self.min_vulns > self.max_vulns:
+            raise ValueError(
+                f"min_vulns ({self.min_vulns}) must be <= max_vulns ({self.max_vulns})"
+            )
+        return self
+# ---------------------------------------------------------------------------
+# Top-level manifest
+# ---------------------------------------------------------------------------
+class Manifest(BaseModel):
+    """Top-level range manifest -- the contract between humans and the Builder.
+    Required fields define the network and vuln envelope. Optional fields
+    (company, users, NPCs, data, processes) provide narrative context that
+    lets the Builder generate realistic, interconnected scenarios.
+    """
+    name: str = Field(..., description="Human-readable range name")
+    tier: int = Field(..., ge=1, le=5, description="Complexity tier (1-5)")
+    # Company context (optional but strongly encouraged)
+    company: Company | None = None
+    departments: list[Department] = Field(default_factory=list)
+    users: list[User] = Field(default_factory=list)
+    npc_personas: list[NPCProfile] = Field(default_factory=list)
+    data_inventory: list[DataAsset] = Field(default_factory=list)
+    business_processes: list[BusinessProcess] = Field(default_factory=list)
+    # Infrastructure realism (optional -- enriches Builder context)
+    tech_stack: TechStack | Any | None = None
+    credential_policy: CredentialPolicy | Any | None = None
+    monitoring_coverage: MonitoringCoverage | Any | None = None
+    trust_relationships: list[TrustRelationship] = Field(default_factory=list)
+    operational_context: OperationalContext | Any | None = None
+    # Core topology and vuln envelope
+    topology: Topology
+    bug_families: list[str] = Field(
+        ...,
+        min_length=1,
+        description="Vulnerability classes the Builder may plant (LLM generates details from these type names)",
+    )
+    task_families: list[str] = Field(
+        default=["exploit", "investigate", "patch", "report"],
+        description="Task types agents may be asked to perform",
+    )
+    difficulty: Difficulty
+# ---------------------------------------------------------------------------
+# Loader
+# ---------------------------------------------------------------------------
+def load_manifest(path: str | Path) -> Manifest:
+    """Load a YAML manifest file and return a validated ``Manifest``.
+    Raises ``FileNotFoundError`` if the file does not exist.
+    Raises ``pydantic.ValidationError`` if the content is invalid.
+    """
+    path = Path(path)
+    if not path.exists():
+        raise FileNotFoundError(f"Manifest not found: {path}")
+    with open(path) as fh:
+        raw = yaml.safe_load(fh)
+    return Manifest(**raw)

src/open_range/server/environment.py CHANGED Viewed

@@ -181,7 +181,9 @@ class RangeEnvironment(Environment[RangeAction, RangeObservation, RangeState]):
             elif self._get_docker() is not None:
                 self._execution_mode = "docker"
             else:
-                self._execution_mode = "subprocess"
     # -----------------------------------------------------------------
     # Docker helpers
@@ -926,6 +928,20 @@ class RangeEnvironment(Environment[RangeAction, RangeObservation, RangeState]):
         snapshot.compose = compose
         return True
     def _refresh_npc_traffic_log(self) -> None:
         """Pull latest NPC activity from the manager into the traffic log."""
         if self._npc_manager is not None:
@@ -1433,6 +1449,7 @@ class RangeEnvironment(Environment[RangeAction, RangeObservation, RangeState]):
         # Runtime-backed episodes boot a fresh project per reset. Manual/mock
         # snapshots still use direct artifact application.
         activated = self._activate_runtime_snapshot(self._snapshot, episode_id=eid)
         # Start services BEFORE applying snapshot data so that daemons
         # (MySQL, slapd, etc.) are ready to receive SQL / LDIF payloads.

             elif self._get_docker() is not None:
                 self._execution_mode = "docker"
             else:
+                # Missing Docker must not silently change the environment
+                # semantics to host-shell execution. Degrade to mock mode.
+                self._execution_mode = "docker"
     # -----------------------------------------------------------------
     # Docker helpers
         snapshot.compose = compose
         return True
+    def _ensure_clean_reset_path(self, *, activated: bool) -> None:
+        """Reject live Docker resets that would overlay onto mutable containers."""
+        if activated or self._execution_mode != "docker":
+            return
+        if self._docker_available is False:
+            return
+        if self._get_docker() is None:
+            return
+        raise RuntimeError(
+            "Direct docker snapshot reset is disabled because it overlays mutable "
+            "container state across episodes. Use ManagedSnapshotRuntime or "
+            "explicitly opt into execution_mode='subprocess'."
+        )
     def _refresh_npc_traffic_log(self) -> None:
         """Pull latest NPC activity from the manager into the traffic log."""
         if self._npc_manager is not None:
         # Runtime-backed episodes boot a fresh project per reset. Manual/mock
         # snapshots still use direct artifact application.
         activated = self._activate_runtime_snapshot(self._snapshot, episode_id=eid)
+        self._ensure_clean_reset_path(activated=activated)
         # Start services BEFORE applying snapshot data so that daemons
         # (MySQL, slapd, etc.) are ready to receive SQL / LDIF payloads.

tests/test_environment.py CHANGED Viewed

@@ -1,5 +1,7 @@
 """Tests for RangeEnvironment lifecycle — all run without Docker."""
 import pytest
 from open_range.protocols import (
@@ -72,6 +74,21 @@ class TestReset:
         # In mock mode service health is unknown, but hosts should be tracked.
         assert set(env.state.services_status.keys()) == {"attacker", "siem"}
 class TestTargetResolution:
     """Target selection should honor manifest-compiled metadata."""

 """Tests for RangeEnvironment lifecycle — all run without Docker."""
+from unittest.mock import patch
 import pytest
 from open_range.protocols import (
         # In mock mode service health is unknown, but hosts should be tracked.
         assert set(env.state.services_status.keys()) == {"attacker", "siem"}
+    def test_auto_without_docker_uses_mock_docker_mode(self):
+        def fake_get_docker(self):
+            self._docker_available = False
+            return None
+        with patch.object(RangeEnvironment, "_get_docker", fake_get_docker):
+            env = RangeEnvironment(docker_available=None, execution_mode="auto")
+        env.reset(snapshot=_MINIMAL_SNAPSHOT)
+        obs = env.step(RangeAction(command="printf test", mode="red"))
+        assert env._execution_mode == "docker"
+        assert obs.stderr == ""
+        assert "[mock] executed on attacker" in obs.stdout
 class TestTargetResolution:
     """Target selection should honor manifest-compiled metadata."""

tests/test_lint.py CHANGED Viewed

@@ -7,12 +7,11 @@ from pathlib import Path
 import shutil
 import subprocess
 import sys
-import tomllib
 import pytest
 import yaml
-from manifests.schema import Manifest, load_manifest
 from open_range.lint import lint_file, lint_manifest
@@ -291,15 +290,12 @@ class TestLintFile:
 class TestPackagingAndInvocation:
-    def test_pyproject_includes_manifests_package(self):
-        pyproject = tomllib.loads((ROOT / "pyproject.toml").read_text())
-        packages = pyproject["tool"]["setuptools"]["packages"]
-        assert "manifests" in packages
     def test_lint_module_runs_outside_repo_with_packaged_layout(self, tmp_path):
         site_root = tmp_path / "site"
         shutil.copytree(ROOT / "src" / "open_range", site_root / "open_range")
-        shutil.copytree(ROOT / "src" / "manifests", site_root / "manifests")
         outside = tmp_path / "outside"
         outside.mkdir()

 import shutil
 import subprocess
 import sys
 import pytest
 import yaml
+from open_range.manifest_schema import Manifest, load_manifest
 from open_range.lint import lint_file, lint_manifest
 class TestPackagingAndInvocation:
+    def test_open_range_package_contains_manifest_schema(self):
+        assert (ROOT / "src" / "open_range" / "manifest_schema.py").exists()
     def test_lint_module_runs_outside_repo_with_packaged_layout(self, tmp_path):
         site_root = tmp_path / "site"
         shutil.copytree(ROOT / "src" / "open_range", site_root / "open_range")
         outside = tmp_path / "outside"
         outside.mkdir()

tests/test_runtime.py CHANGED Viewed

@@ -483,6 +483,22 @@ class TestManagedSnapshotRuntime:
 class TestEnvironmentRuntimeIntegration:
     def test_reset_uses_managed_runtime_snapshot(self, tier1_manifest, tmp_path):
         runtime = ManagedSnapshotRuntime(
             manifest=tier1_manifest,

 class TestEnvironmentRuntimeIntegration:
+    def test_reset_rejects_direct_live_docker_overlay(self):
+        snapshot = SnapshotSpec(
+            topology={"hosts": ["attacker", "siem", "web"]},
+            compose={"services": {"attacker": {}, "siem": {}, "web": {}}},
+            files={"web:/var/www/html/index.php": "<?php echo 'hi'; ?>"},
+            task={"red_briefing": "Go.", "blue_briefing": "Watch."},
+        )
+        env = RangeEnvironment(docker_available=True, execution_mode="docker")
+        env._get_docker = lambda: object()  # type: ignore[method-assign]
+        try:
+            with pytest.raises(RuntimeError, match="Direct docker snapshot reset is disabled"):
+                env.reset(snapshot=snapshot)
+        finally:
+            env.close()
     def test_reset_uses_managed_runtime_snapshot(self, tier1_manifest, tmp_path):
         runtime = ManagedSnapshotRuntime(
             manifest=tier1_manifest,