Spaces:

scaler-hack
/

scaler-openenv

Sleeping

App Files Files Community

Hacktrix-121 commited on Apr 9

Commit

7cd2458

1 Parent(s): 7ee6ca2

error handling

Browse files

Files changed (6) hide show

openenv.yaml +5 -3
pytest_output.txt +0 -0
src/adaptive_alert_triage/server.py +4 -2
tasks/hard.py +6 -9
tasks/medium.py +3 -4
tests/test_tasks.py +17 -12

openenv.yaml CHANGED Viewed

@@ -140,7 +140,7 @@ tasks:
     correlation_probability: 0.10
     success_threshold: 0.70 # correct_actions / total_actions >= 0.70
     grader: "tasks.easy.EasyTaskGrader"
-    grading_formula: "score = correct_actions / total_actions"
   - id: "medium"
     name: "Resource-Constrained Triage"
@@ -158,7 +158,8 @@ tasks:
     grader: "tasks.medium.MediumTaskGrader"
     grading_formula: |
       raw = resolved_score / max_possible_score
-      score = max(0, raw - fp_penalty(0.30) - critical_miss_penalty(0.20))
   - id: "hard"
     name: "Cascading Failure Prevention"
@@ -178,7 +179,8 @@ tasks:
     grading_formula: |
       chain_score = Σ stop_reward(position) × severity_weight
       stability   = {0 failures: 1.0, 1: 0.80, 2: 0.60, 3: 0.30, 4+: 0.00}
-      score       = min(chain_score / max_possible * stability, 1.0)
 # ── Evaluation metrics (produced by graders) ──────────────────────────────────
 metrics:

     correlation_probability: 0.10
     success_threshold: 0.70 # correct_actions / total_actions >= 0.70
     grader: "tasks.easy.EasyTaskGrader"
+    grading_formula: "score = max(0.01, min(0.99, (correct_actions / total_actions) * 0.98 + 0.01))"
   - id: "medium"
     name: "Resource-Constrained Triage"
     grader: "tasks.medium.MediumTaskGrader"
     grading_formula: |
       raw = resolved_score / max_possible_score
+      base_score = max(0, raw - fp_penalty(0.30) - critical_miss_penalty(0.20))
+      score = max(0.01, min(0.99, base_score * 0.98 + 0.01))
   - id: "hard"
     name: "Cascading Failure Prevention"
     grading_formula: |
       chain_score = Σ stop_reward(position) × severity_weight
       stability   = {0 failures: 1.0, 1: 0.80, 2: 0.60, 3: 0.30, 4+: 0.00}
+      base_score  = min(chain_score / max_possible * stability, 1.0)
+      score       = max(0.01, min(0.99, base_score * 0.98 + 0.01))
 # ── Evaluation metrics (produced by graders) ──────────────────────────────────
 metrics:

pytest_output.txt ADDED Viewed

Binary file (25.1 kB). View file

src/adaptive_alert_triage/server.py CHANGED Viewed

@@ -130,7 +130,8 @@ def _tick(info: Dict) -> None:
 def _score() -> float:
-    return _step_correct / _step_total if _step_total else 0.0
 # ── PPO helpers ───────────────────────────────────────────────────────────────
@@ -602,7 +603,8 @@ async def ws_train(websocket: WebSocket):
                 obs, reward, done, info = env.step(act)
                 lt += 1
                 if info.get("action_correct", False): lc += 1
-                s = lc / lt if lt else 0.0
                 if done: episode_scores.append(s)
                 info["task_score"] = s
                 await websocket.send_json({

 def _score() -> float:
+    raw = _step_correct / _step_total if _step_total else 0.0
+    return max(0.01, min(round(0.01 + 0.98 * raw, 2), 0.99))
 # ── PPO helpers ───────────────────────────────────────────────────────────────
                 obs, reward, done, info = env.step(act)
                 lt += 1
                 if info.get("action_correct", False): lc += 1
+                raw_s = lc / lt if lt else 0.0
+                s = max(0.01, min(round(0.01 + 0.98 * raw_s, 2), 0.99))
                 if done: episode_scores.append(s)
                 info["task_score"] = s
                 await websocket.send_json({

tasks/hard.py CHANGED Viewed

@@ -411,17 +411,16 @@ class HardTaskGrader:
         """
         Fraction of chains that were successfully stopped (any position).
-        Returns 0.99 when no chains exist (nothing to detect).
         """
         if not self._chains:
-            return 0.99
         stopped = sum(
             1 for c in self._chains.values()
             if c.completed and not c.hit_failure
         )
         raw = stopped / len(self._chains)
-        # Clamp to (0, 1)
-        return max(0.01, min(raw, 0.99))
     def calculate_stability_score(self) -> float:
         """Return the stability multiplier for the current failure count."""
@@ -570,13 +569,11 @@ class HardTaskGrader:
     @staticmethod
     def _stability_score(failures: int) -> float:
-        """Step-function stability multiplier clamped to (0, 1)."""
         for threshold, score in _STABILITY_BY_FAILURES:
             if failures <= threshold:
-                # Clamp stability scores to strict (0, 1)
-                return max(0.01, min(score, 0.99))
-        # Return floor clamped to (0, 1)
-        return max(0.01, min(_STABILITY_FLOOR, 0.99))
 # ---------------------------------------------------------------------------

         """
         Fraction of chains that were successfully stopped (any position).
+        Returns 1.0 when no chains exist (nothing to detect).
         """
         if not self._chains:
+            return 1.0
         stopped = sum(
             1 for c in self._chains.values()
             if c.completed and not c.hit_failure
         )
         raw = stopped / len(self._chains)
+        return raw
     def calculate_stability_score(self) -> float:
         """Return the stability multiplier for the current failure count."""
     @staticmethod
     def _stability_score(failures: int) -> float:
+        """Step-function stability multiplier."""
         for threshold, score in _STABILITY_BY_FAILURES:
             if failures <= threshold:
+                return score
+        return _STABILITY_FLOOR
 # ---------------------------------------------------------------------------

tasks/medium.py CHANGED Viewed

@@ -236,16 +236,15 @@ class MediumTaskGrader:
         Fraction of INVESTIGATE + ESCALATE actions that were productive.
         Productive = action on an alert with true_severity ≥ 0.50.
-        Returns 0.99 when no costly actions were taken (or 0.99 for perfect efficiency).
         """
         costly = [h for h in self._action_history
                   if h["action"] in ("INVESTIGATE", "ESCALATE")]
         if not costly:
-            return 0.99
         productive = sum(1 for h in costly if h["true_severity"] >= _MEDIUM_LOWER)
         raw = productive / len(costly)
-        # Clamp to (0, 1)
-        return max(0.01, min(raw, 0.99))
     # ------------------------------------------------------------------
     # Metrics

         Fraction of INVESTIGATE + ESCALATE actions that were productive.
         Productive = action on an alert with true_severity ≥ 0.50.
+        Returns 1.0 when no costly actions were taken (or 1.0 for perfect efficiency).
         """
         costly = [h for h in self._action_history
                   if h["action"] in ("INVESTIGATE", "ESCALATE")]
         if not costly:
+            return 1.0
         productive = sum(1 for h in costly if h["true_severity"] >= _MEDIUM_LOWER)
         raw = productive / len(costly)
+        return raw
     # ------------------------------------------------------------------
     # Metrics

tests/test_tasks.py CHANGED Viewed

@@ -226,7 +226,8 @@ class TestHardTaskGrader:
     def test_correlation_detection(self):
         """Test bonus for handling correlated alerts."""
         correlation_chains = [["alert_001", "alert_002", "alert_003"]]
-        grader = HardTaskGrader(correlation_chains=correlation_chains)
         alert = Alert(
             id="alert_001",
@@ -249,7 +250,8 @@ class TestHardTaskGrader:
     def test_failure_prevention_bonus(self):
         """Test bonus for preventing cascading failures."""
         correlation_chains = [["alert_001", "alert_002", "alert_003"]]
-        grader = HardTaskGrader(correlation_chains=correlation_chains)
         # Handle first alert in chain (early detection)
         alert = Alert(
@@ -266,17 +268,17 @@ class TestHardTaskGrader:
         grader.grade_action(action, alert, reward)
-        assert grader.failures_prevented >= 1, "Should register failure prevention"
     def test_system_failure_penalty(self):
         """Test heavy penalty for system failures."""
         grader = HardTaskGrader()
         # Record a failure
-        grader.record_system_failure("alert_001")
-        assert grader.system_failures == 1
-        assert grader.stability_penalty > 0.0
         # Stability score should be reduced
         stability = grader.calculate_stability_score()
@@ -285,7 +287,8 @@ class TestHardTaskGrader:
     def test_missed_correlated_alert_penalty(self):
         """Test extra penalty for missing correlated alerts."""
         correlation_chains = [["alert_001", "alert_002"]]
-        grader = HardTaskGrader(correlation_chains=correlation_chains)
         alert = Alert(
             id="alert_001",
@@ -301,8 +304,8 @@ class TestHardTaskGrader:
         contribution = grader.grade_action(action, alert, reward)
-        # Should have heavy penalty for missing correlated critical
-        assert contribution < -2.0, "Should have extra penalty for correlated miss"
     def test_correlation_detection_rate(self):
         """Test calculation of correlation detection rate."""
@@ -310,10 +313,12 @@ class TestHardTaskGrader:
             ["alert_001", "alert_002"],
             ["alert_003", "alert_004"],
         ]
-        grader = HardTaskGrader(correlation_chains=correlation_chains)
         # Handle one chain
-        grader.chains_handled.add(0)
         rate = grader.calculate_correlation_detection_rate()
         assert abs(rate - 0.5) < 0.01, "Should detect 50% of chains"
@@ -331,7 +336,7 @@ class TestHardTaskGrader:
         # Multiple failures
         for _ in range(3):
-            grader.record_system_failure()
         stability = grader.calculate_stability_score()
         assert stability < 1.0, "Failures should reduce stability"

     def test_correlation_detection(self):
         """Test bonus for handling correlated alerts."""
         correlation_chains = [["alert_001", "alert_002", "alert_003"]]
+        grader = HardTaskGrader()
+        grader.update_correlation_state(correlation_chains)
         alert = Alert(
             id="alert_001",
     def test_failure_prevention_bonus(self):
         """Test bonus for preventing cascading failures."""
         correlation_chains = [["alert_001", "alert_002", "alert_003"]]
+        grader = HardTaskGrader()
+        grader.update_correlation_state(correlation_chains)
         # Handle first alert in chain (early detection)
         alert = Alert(
         grader.grade_action(action, alert, reward)
+        m = grader.get_metrics()
+        assert m["chains_stopped"] >= 1, "Should register failure prevention"
     def test_system_failure_penalty(self):
         """Test heavy penalty for system failures."""
         grader = HardTaskGrader()
         # Record a failure
+        grader.record_failures(1)
+        assert grader._system_failures == 1
         # Stability score should be reduced
         stability = grader.calculate_stability_score()
     def test_missed_correlated_alert_penalty(self):
         """Test extra penalty for missing correlated alerts."""
         correlation_chains = [["alert_001", "alert_002"]]
+        grader = HardTaskGrader()
+        grader.update_correlation_state(correlation_chains)
         alert = Alert(
             id="alert_001",
         contribution = grader.grade_action(action, alert, reward)
+        # Should have negative contribution for missing correlated critical
+        assert contribution < -0.2, f"Should have extra penalty for correlated miss, got {contribution}"
     def test_correlation_detection_rate(self):
         """Test calculation of correlation detection rate."""
             ["alert_001", "alert_002"],
             ["alert_003", "alert_004"],
         ]
+        grader = HardTaskGrader()
+        grader.update_correlation_state(correlation_chains)
         # Handle one chain
+        alert = Alert(id="alert_001", visible_severity=0.8, confidence=0.85, alert_type="CPU", age=1, true_severity=0.85, is_correlated=True)
+        grader.grade_action(Action(alert_id="alert_001", action_type="INVESTIGATE"), alert, Reward(value=0))
         rate = grader.calculate_correlation_detection_rate()
         assert abs(rate - 0.5) < 0.01, "Should detect 50% of chains"
         # Multiple failures
         for _ in range(3):
+            grader.record_failures(1)
         stability = grader.calculate_stability_score()
         assert stability < 1.0, "Failures should reduce stability"