File size: 2,431 Bytes
8c486a8
 
 
 
 
 
 
 
 
 
 
49d1c75
8c486a8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49d1c75
8c486a8
 
 
49d1c75
 
8c486a8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
"""RangeAgent protocol and EpisodeResult dataclass.

Follows the same structural subtyping pattern as SnapshotBuilder,
NPCBehavior, and ValidatorCheck in ``open_range.protocols``.  Any object
with matching ``reset`` and ``act`` methods satisfies the protocol --
no base class required.
"""

from __future__ import annotations

from dataclasses import dataclass, field
from typing import Any, Literal, Protocol, runtime_checkable


@runtime_checkable
class RangeAgent(Protocol):
    """Agent that can play Red or Blue in OpenRange.

    This is a structural protocol -- any class with the right method
    signatures satisfies it.  No inheritance needed.
    """

    def reset(self, briefing: str, role: Literal["red", "blue"]) -> None:
        """Initialize agent for a new episode.

        Args:
            briefing: Task description from the snapshot
                      (Red: "Target network with web services..."
                       Blue: "You are SOC analyst for AcmeCorp...")
            role: Which side this agent plays.
        """
        ...

    def act(self, observation: Any) -> str:
        """Given an observation, return the next command to execute.

        Args:
            observation: The previous observation object, or a plain string for
                simpler agents.

        Returns:
            Shell command string (e.g., ``"nmap -sV 10.0.1.0/24"``).
        """
        ...


# ---------------------------------------------------------------------------
# Episode metrics & result
# ---------------------------------------------------------------------------


@dataclass
class EpisodeMetrics:
    """Computed metrics for a single episode."""

    flag_capture_rate: float = 0.0
    steps_to_first_flag: int | None = None
    stealth: float = 0.0
    detection_tp: float = 0.0
    false_positives: float = 0.0
    patch_success_rate: float = 0.0
    availability: float = 1.0


@dataclass
class EpisodeResult:
    """Full result of a single Red + Blue episode."""

    red_trajectory: list[dict] = field(default_factory=list)
    blue_trajectory: list[dict] = field(default_factory=list)
    flags_found: list[str] = field(default_factory=list)
    steps: int = 0
    tier: int = 1
    snapshot_id: str = ""
    red_model: str = ""
    blue_model: str = ""
    outcome: str = "timeout"  # "red_win" | "blue_win" | "timeout"
    metrics: EpisodeMetrics = field(default_factory=EpisodeMetrics)