Spaces:

scaler-hack
/

scaler-openenv

Sleeping

Tusharp2006 commited on Apr 8

Commit

a9d4552

unverified ·

2 Parent(s): 311abeb 1341fa9

Merge pull request #4 from suraj-gupta-01/phase2

Files changed (4) hide show

src/adaptive_alert_triage/utils.py CHANGED Viewed

@@ -235,6 +235,15 @@ def generate_alert(
     # Observable severity (noisy)
     visible_severity: float = add_observation_noise(true_severity, confidence)
     return Alert(
         id=alert_id,
         visible_severity=visible_severity,
@@ -246,10 +255,12 @@ def generate_alert(
         metadata={
             "false_positive": is_fp,
             "generated_at_step": step,
         },
     )
 # ---------------------------------------------------------------------------
 # Correlated-alert chain generation
 # ---------------------------------------------------------------------------

     # Observable severity (noisy)
     visible_severity: float = add_observation_noise(true_severity, confidence)
+    # --- Extreme Outlier Logic (stochastic noise for score variance) ---
+    # Adds a 2% chance of a "rogue" alert that contradicts its indicators,
+    # ensuring that even perfect agents have some score variance < 1.0.
+    if np.random.random() < 0.02:
+        if true_severity >= 0.8:
+            visible_severity = float(np.random.uniform(0.0, 0.2))  # "Hidden Critical"
+        elif true_severity <= 0.2:
+            visible_severity = float(np.random.uniform(0.8, 1.0))  # "Phantom Critical"
     return Alert(
         id=alert_id,
         visible_severity=visible_severity,
         metadata={
             "false_positive": is_fp,
             "generated_at_step": step,
+            "is_outlier": True,  # mark for audit
         },
     )
 # ---------------------------------------------------------------------------
 # Correlated-alert chain generation
 # ---------------------------------------------------------------------------

tasks/easy.py CHANGED Viewed

@@ -60,7 +60,7 @@ _MEDIUM_ESCALATE_MIN: float      = 0.60   # ESCALATE acceptable above this
 _MEDIUM_IGNORE_MAX: float        = 0.50   # IGNORE acceptable below this
 # Pass threshold
-SUCCESS_THRESHOLD: float = 0.70
 # ---------------------------------------------------------------------------
@@ -161,14 +161,20 @@ class EasyTaskGrader:
     def get_episode_score(self) -> float:
         """
-        Return final normalised score in [0.0, 1.0].
-        Formula: correct_actions / total_actions
-        Returns 0.0 when no actions have been taken.
         """
         if self.total_actions == 0:
-            return 0.0
-        return self.correct_actions / self.total_actions
     def passed(self) -> bool:
         """Return True if the agent meets the easy-task success threshold."""

 _MEDIUM_IGNORE_MAX: float        = 0.50   # IGNORE acceptable below this
 # Pass threshold
+SUCCESS_THRESHOLD: float = 0.696
 # ---------------------------------------------------------------------------
     def get_episode_score(self) -> float:
         """
+        Return final normalised score in (0, 1).
+        Formula: 0.01 + 0.98 * (correct_actions / total_actions)
+        This ensures the score is always strictly between 0 and 1 as
+        required by the grading system.
         """
         if self.total_actions == 0:
+            return 0.01
+        raw = self.correct_actions / self.total_actions
+        # Enforce strict (0, 1) range
+        clamped = 0.01 + 0.98 * raw
+        return round(float(clamped), 6)
     def passed(self) -> bool:
         """Return True if the agent meets the easy-task success threshold."""

tasks/hard.py CHANGED Viewed

@@ -372,15 +372,13 @@ class HardTaskGrader:
     def get_episode_score(self) -> float:
         """
-        Return final normalised score in [0.0, 1.0].
         Formula:
-            chain_score  = Σ chain.outcome_score()  for all chains
-            max_chain    = Σ chain.max_possible()    for all chains
-            isolation    = min(isolation_correct * _ISOLATION_BONUS, cap)
-            raw          = (chain_score + isolation) / max(max_chain, 1.0)
             stability    = _stability_score(system_failures)
-            final        = max(0.0, min(raw * stability, 1.0))
         """
         # Chain component
         chain_score = sum(c.outcome_score() for c in self._chains.values())
@@ -396,8 +394,12 @@ class HardTaskGrader:
         raw = min((chain_score + isolation) / denominator, 1.0)
         stability = self._stability_score(self._system_failures)
-        final     = max(0.0, min(raw * stability, 1.0))
-        return round(final, 6)
     def passed(self) -> bool:
         """Return True if the agent meets the hard-task success threshold."""

     def get_episode_score(self) -> float:
         """
+        Return final normalised score in (0, 1).
         Formula:
+            chain_score  = Σ chain.outcome_score()
             stability    = _stability_score(system_failures)
+            base         = (raw * stability)
+            clamped      = 0.01 + 0.98 * base
         """
         # Chain component
         chain_score = sum(c.outcome_score() for c in self._chains.values())
         raw = min((chain_score + isolation) / denominator, 1.0)
         stability = self._stability_score(self._system_failures)
+        final_base = max(0.0, min(raw * stability, 1.0))
+        # Enforce strict (0, 1) range
+        clamped = 0.01 + 0.98 * final_base
+        return round(float(clamped), 6)
     def passed(self) -> bool:
         """Return True if the agent meets the hard-task success threshold."""

tasks/medium.py CHANGED Viewed

@@ -76,7 +76,7 @@ _CRITICAL_MISS_PENALTY_WEIGHT: float = 0.20
 # Filtering-bonus cap so ignoring FPs never inflates score above 1.0
 _FP_BONUS_CAP_PER_ALERT: float = 0.15
-SUCCESS_THRESHOLD: float = 0.55
 # ---------------------------------------------------------------------------
@@ -192,14 +192,15 @@ class MediumTaskGrader:
     def get_episode_score(self) -> float:
         """
-        Return final normalised score in [0.0, 1.0].
         Formula:
             raw   = resolved_score / max_possible_score
-            score = max(0.0, raw − fp_penalty − critical_miss_penalty)
         """
         if self._max_possible_score <= 0.0:
-            return 0.0
         # Normalised resolved quality
         raw = min(self._resolved_score / self._max_possible_score, 1.0)
@@ -218,8 +219,11 @@ class MediumTaskGrader:
             miss_rate = 0.0
         miss_penalty = _CRITICAL_MISS_PENALTY_WEIGHT * miss_rate
-        score = max(0.0, raw - fp_penalty - miss_penalty)
-        return round(score, 6)
     def passed(self) -> bool:
         """Return True if the agent meets the medium-task success threshold."""

 # Filtering-bonus cap so ignoring FPs never inflates score above 1.0
 _FP_BONUS_CAP_PER_ALERT: float = 0.15
+SUCCESS_THRESHOLD: float = 0.549
 # ---------------------------------------------------------------------------
     def get_episode_score(self) -> float:
         """
+        Return final normalised score in (0, 1).
         Formula:
             raw   = resolved_score / max_possible_score
+            base  = max(0.0, raw − fp_penalty − miss_penalty)
+            clamped = 0.01 + 0.98 * base
         """
         if self._max_possible_score <= 0.0:
+            return 0.01
         # Normalised resolved quality
         raw = min(self._resolved_score / self._max_possible_score, 1.0)
             miss_rate = 0.0
         miss_penalty = _CRITICAL_MISS_PENALTY_WEIGHT * miss_rate
+        base_score = max(0.0, raw - fp_penalty - miss_penalty)
+        # Enforce strict (0, 1) range
+        clamped = 0.01 + 0.98 * base_score
+        return round(float(clamped), 6)
     def passed(self) -> bool:
         """Return True if the agent meets the medium-task success threshold."""