Spaces:
Runtime error
Runtime error
File size: 2,431 Bytes
8c486a8 49d1c75 8c486a8 49d1c75 8c486a8 49d1c75 8c486a8 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 | """RangeAgent protocol and EpisodeResult dataclass.
Follows the same structural subtyping pattern as SnapshotBuilder,
NPCBehavior, and ValidatorCheck in ``open_range.protocols``. Any object
with matching ``reset`` and ``act`` methods satisfies the protocol --
no base class required.
"""
from __future__ import annotations
from dataclasses import dataclass, field
from typing import Any, Literal, Protocol, runtime_checkable
@runtime_checkable
class RangeAgent(Protocol):
"""Agent that can play Red or Blue in OpenRange.
This is a structural protocol -- any class with the right method
signatures satisfies it. No inheritance needed.
"""
def reset(self, briefing: str, role: Literal["red", "blue"]) -> None:
"""Initialize agent for a new episode.
Args:
briefing: Task description from the snapshot
(Red: "Target network with web services..."
Blue: "You are SOC analyst for AcmeCorp...")
role: Which side this agent plays.
"""
...
def act(self, observation: Any) -> str:
"""Given an observation, return the next command to execute.
Args:
observation: The previous observation object, or a plain string for
simpler agents.
Returns:
Shell command string (e.g., ``"nmap -sV 10.0.1.0/24"``).
"""
...
# ---------------------------------------------------------------------------
# Episode metrics & result
# ---------------------------------------------------------------------------
@dataclass
class EpisodeMetrics:
"""Computed metrics for a single episode."""
flag_capture_rate: float = 0.0
steps_to_first_flag: int | None = None
stealth: float = 0.0
detection_tp: float = 0.0
false_positives: float = 0.0
patch_success_rate: float = 0.0
availability: float = 1.0
@dataclass
class EpisodeResult:
"""Full result of a single Red + Blue episode."""
red_trajectory: list[dict] = field(default_factory=list)
blue_trajectory: list[dict] = field(default_factory=list)
flags_found: list[str] = field(default_factory=list)
steps: int = 0
tier: int = 1
snapshot_id: str = ""
red_model: str = ""
blue_model: str = ""
outcome: str = "timeout" # "red_win" | "blue_win" | "timeout"
metrics: EpisodeMetrics = field(default_factory=EpisodeMetrics)
|