Spaces:

yashppawar
/

forensic-shell

Sleeping

App Files Files Community

yashppawar commited on Apr 12

Commit

6f6baad

verified ·

1 Parent(s): 401c6f8

Upload folder using huggingface_hub

Browse files

Files changed (5) hide show

agents/llm_policy.py +5 -1
models.py +1 -1
openenv_forensic_shell.egg-info/SOURCES.txt +1 -0
server/forensic_shell_environment.py +46 -8
server/grader.py +39 -15

agents/llm_policy.py CHANGED Viewed

@@ -47,6 +47,7 @@ SYSTEM_PROMPT = textwrap.dedent(
       {"action_type": "read_file",    "path": "/some/file", "max_bytes": 2048}
       {"action_type": "grep",         "pattern": "substring", "path": "/some/file"}
       {"action_type": "stat",         "path": "/some/file"}
       {"action_type": "submit_report","report": {
            "compromised_user": "alice",
            "initial_ip": "198.51.100.77",
@@ -63,7 +64,8 @@ SYSTEM_PROMPT = textwrap.dedent(
     Rules:
       - Output EXACTLY ONE JSON object. No commentary, no markdown.
-      - Start with list_dir on /var/log and /home to orient yourself.
       - Read /var/log/auth.log to find the compromised user and source IP.
       - For medium/hard tasks, also find modified files and use 'stat' to
         compute the backdoor SHA256 (the stat action returns sha256).
@@ -150,6 +152,8 @@ def action_to_str(action: ForensicShellAction) -> str:
         return f"grep({action.pattern!r},{action.path!r})"
     if action.action_type == "stat":
         return f"stat({action.path!r})"
     if action.action_type == "submit_report":
         return "submit_report(...)"
     return action.action_type

       {"action_type": "read_file",    "path": "/some/file", "max_bytes": 2048}
       {"action_type": "grep",         "pattern": "substring", "path": "/some/file"}
       {"action_type": "stat",         "path": "/some/file"}
+      {"action_type": "find",         "pattern": "*.log", "path": "/var"}
       {"action_type": "submit_report","report": {
            "compromised_user": "alice",
            "initial_ip": "198.51.100.77",
     Rules:
       - Output EXACTLY ONE JSON object. No commentary, no markdown.
+      - Start with list_dir on /var/log and /home, or find('*', '/') to orient.
+      - Use find('*.sh', '/') to discover attacker scripts recursively.
       - Read /var/log/auth.log to find the compromised user and source IP.
       - For medium/hard tasks, also find modified files and use 'stat' to
         compute the backdoor SHA256 (the stat action returns sha256).
         return f"grep({action.pattern!r},{action.path!r})"
     if action.action_type == "stat":
         return f"stat({action.path!r})"
+    if action.action_type == "find":
+        return f"find({action.pattern!r},{action.path!r})"
     if action.action_type == "submit_report":
         return "submit_report(...)"
     return action.action_type

models.py CHANGED Viewed

@@ -47,7 +47,7 @@ class ForensicShellAction(Action):
     """Agent action. Use action_type to pick the verb; set only the fields that verb needs."""
     action_type: Literal[
-        "list_dir", "read_file", "grep", "stat", "submit_report"
     ] = Field(..., description="Which verb to execute")
     path: Optional[str] = Field(
         default=None, description="Target path for list_dir / read_file / grep / stat"

     """Agent action. Use action_type to pick the verb; set only the fields that verb needs."""
     action_type: Literal[
+        "list_dir", "read_file", "grep", "stat", "find", "submit_report"
     ] = Field(..., description="Which verb to execute")
     path: Optional[str] = Field(
         default=None, description="Target path for list_dir / read_file / grep / stat"

openenv_forensic_shell.egg-info/SOURCES.txt CHANGED Viewed

@@ -1,6 +1,7 @@
 README.md
 __init__.py
 client.py
 models.py
 pyproject.toml
 ./__init__.py

 README.md
 __init__.py
 client.py
+inference.py
 models.py
 pyproject.toml
 ./__init__.py

server/forensic_shell_environment.py CHANGED Viewed

@@ -33,7 +33,13 @@ except ImportError:
     from scenarios import DEFAULT_TASK_ID, SCENARIOS  # type: ignore
-MAX_STEPS_PER_EPISODE = 30
 # Exploration shaping reward — small positive reward the first time the agent
 # reads one of the scenario's "canonical forensic artifacts" (auth.log, bash
@@ -147,23 +153,31 @@ class ForensicShellEnvironment(Environment):
         self._canonical = _canonical_artifacts(self._scenario)
         self._state = State(episode_id=str(uuid4()), step_count=0)
         return ForensicShellObservation(
             output=(
                 f"ForensicShell ready. Task: {self._task_id} "
-                f"({self._scenario.get('difficulty', 'unknown')}).\n"
                 f"Available actions: list_dir(path), read_file(path,max_bytes), "
-                f"grep(pattern,path), stat(path), submit_report(report).\n"
                 f"Start by listing /var/log or /home."
             ),
             task_id=self._task_id,
             task_description=self._scenario["description"],
-            steps_remaining=MAX_STEPS_PER_EPISODE,
             action_error=None,
             done=False,
             reward=0.0,
             metadata={
-                "difficulty": self._scenario.get("difficulty", ""),
-                "max_steps": MAX_STEPS_PER_EPISODE,
             },
         )
@@ -172,7 +186,7 @@ class ForensicShellEnvironment(Environment):
     def step(self, action: ForensicShellAction) -> ForensicShellObservation:  # type: ignore[override]
         self._state.step_count += 1
         self._steps_used += 1
-        steps_remaining = max(0, MAX_STEPS_PER_EPISODE - self._steps_used)
         # If already done, return a terminal obs (grace)
         if self._done:
@@ -185,7 +199,7 @@ class ForensicShellEnvironment(Environment):
             )
         # Hard cap on steps
-        if self._steps_used > MAX_STEPS_PER_EPISODE:
             self._done = True
             return self._obs(
                 output="Step budget exhausted without a submitted report.",
@@ -218,6 +232,10 @@ class ForensicShellEnvironment(Environment):
                 out, err = self._do_stat(action.path or "")
                 return self._obs(output=out, steps_remaining=steps_remaining, error=err, done=False, reward=0.0)
             if verb == "submit_report":
                 return self._do_submit_report(action, steps_remaining)
@@ -238,6 +256,26 @@ class ForensicShellEnvironment(Environment):
                 reward=0.0,
             )
     # ---- shaping reward -----------------------------------------------------
     def _award_shaping(self, path: str) -> float:

     from scenarios import DEFAULT_TASK_ID, SCENARIOS  # type: ignore
+MAX_STEPS_PER_EPISODE = 30  # default fallback
+# Difficulty-dependent step budgets. Easier tasks shouldn't reward aimless
+# exploration; harder tasks with red herrings genuinely need more budget.
+STEPS_BY_DIFFICULTY = {"easy": 15, "medium": 25, "hard": 35}
+# Hand-authored task overrides (kept for backward compat with Day-1 baselines)
+STEPS_BY_TASK = {"t1_login": 15, "t2_modified": 25, "t3_timeline": 35}
 # Exploration shaping reward — small positive reward the first time the agent
 # reads one of the scenario's "canonical forensic artifacts" (auth.log, bash
         self._canonical = _canonical_artifacts(self._scenario)
         self._state = State(episode_id=str(uuid4()), step_count=0)
+        # Difficulty-dependent step budget
+        diff_label = self._scenario.get("difficulty", "medium")
+        self._max_steps = (
+            STEPS_BY_TASK.get(self._task_id)
+            or STEPS_BY_DIFFICULTY.get(diff_label)
+            or MAX_STEPS_PER_EPISODE
+        )
         return ForensicShellObservation(
             output=(
                 f"ForensicShell ready. Task: {self._task_id} "
+                f"({diff_label}).\n"
                 f"Available actions: list_dir(path), read_file(path,max_bytes), "
+                f"grep(pattern,path), stat(path), find(pattern,path), submit_report(report).\n"
                 f"Start by listing /var/log or /home."
             ),
             task_id=self._task_id,
             task_description=self._scenario["description"],
+            steps_remaining=self._max_steps,
             action_error=None,
             done=False,
             reward=0.0,
             metadata={
+                "difficulty": diff_label,
+                "max_steps": self._max_steps,
             },
         )
     def step(self, action: ForensicShellAction) -> ForensicShellObservation:  # type: ignore[override]
         self._state.step_count += 1
         self._steps_used += 1
+        steps_remaining = max(0, self._max_steps - self._steps_used)
         # If already done, return a terminal obs (grace)
         if self._done:
             )
         # Hard cap on steps
+        if self._steps_used > self._max_steps:
             self._done = True
             return self._obs(
                 output="Step budget exhausted without a submitted report.",
                 out, err = self._do_stat(action.path or "")
                 return self._obs(output=out, steps_remaining=steps_remaining, error=err, done=False, reward=0.0)
+            if verb == "find":
+                out, err = self._do_find(action.pattern or "*", action.path or "/")
+                return self._obs(output=out, steps_remaining=steps_remaining, error=err, done=False, reward=0.0)
             if verb == "submit_report":
                 return self._do_submit_report(action, steps_remaining)
                 reward=0.0,
             )
+    def _do_find(self, pattern: str, path: str) -> Tuple[str, Optional[str]]:
+        """Recursive search: find files matching a glob pattern under a directory."""
+        from fnmatch import fnmatch
+        path = path.rstrip("/") or "/"
+        prefix = "/" if path == "/" else path + "/"
+        if path == "/":
+            prefix = "/"
+        matches: List[str] = []
+        for fp in sorted(self._fs.keys()):
+            if fp == path or fp.startswith(prefix):
+                basename = fp.rsplit("/", 1)[-1] if "/" in fp else fp
+                if fnmatch(basename, pattern):
+                    matches.append(fp)
+                    if len(matches) >= 50:
+                        break
+        if not matches:
+            return f"(no files matching {pattern!r} under {path})", None
+        return "\n".join(matches), None
     # ---- shaping reward -----------------------------------------------------
     def _award_shaping(self, path: str) -> float:

server/grader.py CHANGED Viewed

@@ -4,6 +4,15 @@ Deterministic graders for ForensicShell tasks.
 Each grader takes a submitted ForensicReport (as dict) and the scenario ground-truth
 dict and returns a float in [0.0, 1.0]. Partial credit is awarded per correct subfield
 so the reward function has meaningful gradient, not just 0/1.
 """
 from typing import Dict, List
@@ -13,14 +22,25 @@ def _safe_str(x) -> str:
     return (x or "").strip().lower() if isinstance(x, str) else ""
-def _jaccard(a: List[str], b: List[str]) -> float:
-    sa = {s.strip() for s in a if isinstance(s, str) and s.strip()}
-    sb = {s.strip() for s in b if isinstance(s, str) and s.strip()}
-    if not sa and not sb:
         return 1.0
-    if not sa or not sb:
         return 0.0
-    return len(sa & sb) / len(sa | sb)
 def _kendall_tau_normalized(pred_order: List[str], true_order: List[str]) -> float:
@@ -69,7 +89,8 @@ def _grade_t1_login(report: Dict, truth: Dict) -> float:
 def _grade_t2_modified(report: Dict, truth: Dict) -> float:
     user_ok = 1.0 if _safe_str(report.get("compromised_user")) == _safe_str(truth.get("compromised_user")) else 0.0
     ip_ok = 1.0 if _safe_str(report.get("initial_ip")) == _safe_str(truth.get("initial_ip")) else 0.0
-    files_score = _jaccard(report.get("modified_files") or [], truth.get("modified_files") or [])
     sha_ok = 1.0 if _safe_str(report.get("backdoor_sha256")) == _safe_str(truth.get("backdoor_sha256")) else 0.0
     return 0.2 * user_ok + 0.2 * ip_ok + 0.3 * files_score + 0.3 * sha_ok
@@ -77,7 +98,8 @@ def _grade_t2_modified(report: Dict, truth: Dict) -> float:
 def _grade_t3_timeline(report: Dict, truth: Dict) -> float:
     user_ok = 1.0 if _safe_str(report.get("compromised_user")) == _safe_str(truth.get("compromised_user")) else 0.0
     ip_ok = 1.0 if _safe_str(report.get("initial_ip")) == _safe_str(truth.get("initial_ip")) else 0.0
-    files_score = _jaccard(report.get("modified_files") or [], truth.get("modified_files") or [])
     sha_ok = 1.0 if _safe_str(report.get("backdoor_sha256")) == _safe_str(truth.get("backdoor_sha256")) else 0.0
     pred_timeline = report.get("timeline") or []
@@ -89,7 +111,7 @@ def _grade_t3_timeline(report: Dict, truth: Dict) -> float:
     pred_phases = [p for p in pred_phases if isinstance(p, str)]
     true_phases = [e["phase"] for e in true_timeline]
-    # F1 over phase set
     pred_set = set(pred_phases)
     true_set = set(true_phases)
     if not pred_set and not true_set:
@@ -102,16 +124,20 @@ def _grade_t3_timeline(report: Dict, truth: Dict) -> float:
         recall = tp / len(true_set)
         phase_f1 = 0.0 if (precision + recall) == 0 else 2 * precision * recall / (precision + recall)
-    # Ordering quality (only if there's overlap to order)
     order_score = _kendall_tau_normalized(pred_phases, true_phases)
     return (
         0.15 * user_ok
         + 0.15 * ip_ok
         + 0.15 * files_score
         + 0.15 * sha_ok
-        + 0.20 * phase_f1
-        + 0.20 * order_score
     )
@@ -125,9 +151,7 @@ GRADERS = {
 def _grade_generic(report: Dict, truth: Dict) -> float:
     """
     Dispatcher for procedurally generated scenarios. Picks the right sub-grader
-    by inspecting which fields are present in the ground-truth dict. This keeps
-    the grader agnostic to task_id naming and lets the generator add richer
-    fields without touching this module.
     """
     if "timeline" in truth:
         return _grade_t3_timeline(report, truth)

 Each grader takes a submitted ForensicReport (as dict) and the scenario ground-truth
 dict and returns a float in [0.0, 1.0]. Partial credit is awarded per correct subfield
 so the reward function has meaningful gradient, not just 0/1.
+Design choices:
+  - modified_files uses F0.5 (precision-weighted) instead of Jaccard: submitting
+    false-positive files (claiming an unmodified file was attacked) is penalized
+    more than missing a file. This mirrors real forensics where false positives
+    waste incident response effort.
+  - Timeline scoring is multiplicative (phase_F1 * ordering): having all 5 phases
+    in the wrong order scores 0, not ~0.30. Correct phases AND correct order
+    required for full credit.
 """
 from typing import Dict, List
     return (x or "").strip().lower() if isinstance(x, str) else ""
+def _fbeta(pred: List[str], truth: List[str], beta: float = 0.5) -> float:
+    """
+    F-beta score over string sets. beta < 1 weighs precision more than recall.
+    F0.5 penalizes false positives (extra wrong files) 2x harder than false
+    negatives (missing files), matching real forensic triage priorities.
+    """
+    pred_set = {s.strip() for s in pred if isinstance(s, str) and s.strip()}
+    truth_set = {s.strip() for s in truth if isinstance(s, str) and s.strip()}
+    if not pred_set and not truth_set:
         return 1.0
+    if not pred_set or not truth_set:
         return 0.0
+    tp = len(pred_set & truth_set)
+    precision = tp / len(pred_set)
+    recall = tp / len(truth_set)
+    if precision + recall == 0:
+        return 0.0
+    beta2 = beta * beta
+    return (1 + beta2) * precision * recall / (beta2 * precision + recall)
 def _kendall_tau_normalized(pred_order: List[str], true_order: List[str]) -> float:
 def _grade_t2_modified(report: Dict, truth: Dict) -> float:
     user_ok = 1.0 if _safe_str(report.get("compromised_user")) == _safe_str(truth.get("compromised_user")) else 0.0
     ip_ok = 1.0 if _safe_str(report.get("initial_ip")) == _safe_str(truth.get("initial_ip")) else 0.0
+    # F0.5: precision-weighted — false positives penalized harder than false negatives
+    files_score = _fbeta(report.get("modified_files") or [], truth.get("modified_files") or [], beta=0.5)
     sha_ok = 1.0 if _safe_str(report.get("backdoor_sha256")) == _safe_str(truth.get("backdoor_sha256")) else 0.0
     return 0.2 * user_ok + 0.2 * ip_ok + 0.3 * files_score + 0.3 * sha_ok
 def _grade_t3_timeline(report: Dict, truth: Dict) -> float:
     user_ok = 1.0 if _safe_str(report.get("compromised_user")) == _safe_str(truth.get("compromised_user")) else 0.0
     ip_ok = 1.0 if _safe_str(report.get("initial_ip")) == _safe_str(truth.get("initial_ip")) else 0.0
+    # F0.5 for files (same precision-weighting as t2)
+    files_score = _fbeta(report.get("modified_files") or [], truth.get("modified_files") or [], beta=0.5)
     sha_ok = 1.0 if _safe_str(report.get("backdoor_sha256")) == _safe_str(truth.get("backdoor_sha256")) else 0.0
     pred_timeline = report.get("timeline") or []
     pred_phases = [p for p in pred_phases if isinstance(p, str)]
     true_phases = [e["phase"] for e in true_timeline]
+    # F1 over phase set (standard F1 — we don't precision-weight phases)
     pred_set = set(pred_phases)
     true_set = set(true_phases)
     if not pred_set and not true_set:
         recall = tp / len(true_set)
         phase_f1 = 0.0 if (precision + recall) == 0 else 2 * precision * recall / (precision + recall)
+    # Ordering quality
     order_score = _kendall_tau_normalized(pred_phases, true_phases)
+    # MULTIPLICATIVE timeline scoring: having all phases in wrong order gives
+    # F1=1.0 * tau=0.0 = 0.0, not the ~0.30 an additive scheme would produce.
+    # Correct phases AND correct order both required for full timeline credit.
+    timeline_score = phase_f1 * order_score
     return (
         0.15 * user_ok
         + 0.15 * ip_ok
         + 0.15 * files_score
         + 0.15 * sha_ok
+        + 0.40 * timeline_score
     )
 def _grade_generic(report: Dict, truth: Dict) -> float:
     """
     Dispatcher for procedurally generated scenarios. Picks the right sub-grader
+    by inspecting which fields are present in the ground-truth dict.
     """
     if "timeline" in truth:
         return _grade_t3_timeline(report, truth)