Spaces:

10doshi12
/

firewatch-env

Running

10doshi12 commited on 8 days ago

Commit

609f7b5

1 Parent(s): 5bc3005

fix(SPEC-7): fixes 1-6 — tick guard, cliff removal, blast penalty, MTTM stability, weighted error rate, metric-delta semantic analysis

- grade(): tick guard returns 0.05 when ticks_taken < 2 (Fix 1)
- grade(): remove early-exit cliff wipe of bcm_score/slo (Fix 2)
- grade(): blast_ratio * 0.02 penalty rewards cascade containment (Fix 3)
- IncidentMetrics: require 3 consecutive zero-BCM ticks for MTTM (Fix 4)
- _build_semantic_analysis(): report metric deltas only, no outcome framing (Fix 5)
- _weighted_mean_error_rate(): weights by downstream dependent count (Fix 6)
- EpisodeResult: services_affected_static + total_services_in_episode fields
- _count_blast_radius(): BFS from root cause through reverse dependency graph

Files changed (3) hide show

rewards.py +100 -67
simulation.py +33 -4
tests/test_rewards_fixes.py +30 -1

rewards.py CHANGED Viewed

@@ -96,9 +96,9 @@ class RewardEngine:
         Returns:
             Tuple of (total_reward, breakdown_dict).
         """
-        # 1. Health improvement: mean error rate decrease
-        prev_mean = _mean_error_rate(prev_obs)
-        next_mean = _mean_error_rate(next_obs)
         health_improvement = (prev_mean - next_mean) * REWARD_WEIGHT_HEALTH
         # 2. SLO preservation: budget change
@@ -257,27 +257,25 @@ def grade(episode_result: EpisodeResult, difficulty: str) -> float:
     if task is None:
         return 0.0
     max_ticks = task.max_ticks
     max_bcm = task.max_bad_customer_minutes
     # 1. Recovery (40%)
-    if er.services_affected > 0:
-        recovery = er.services_recovered / er.services_affected
-    else:
-        recovery = 1.0  # No affected services = perfect recovery
-    # Penalize early exit without fix: if the agent gave up, assume worst case for BCM and SLO
-    if recovery < 1.0 and er.ticks_taken < max_ticks:
-        bcm_score = 0.0
-        slo = 0.0
-    else:
-        # BCM score: total user impact relative to worst case
-        bcm_score = max(0.0, 1.0 - (er.bad_customer_minutes / max_bcm))
-        # SLO (15%) — budget remaining
-        slo = max(0.0, min(1.0, er.final_slo_budget / 100.0))
     # 2. Speed (25%) — composite of MTTM + BCM
-    # MTTM score: how quickly user impact was zeroed
     if er.mttm_ticks is not None:
         mttm_score = max(0.0, 1.0 - (er.mttm_ticks / max_ticks))
     else:
@@ -292,19 +290,25 @@ def grade(episode_result: EpisodeResult, difficulty: str) -> float:
     precision = max(
         0.0, 1.0 - (er.wrong_actions * GRADER_WRONG_ACTION_PENALTY_PER_ACTION)
     )
     # False resolution penalty
     if recovery == 0.0:
         precision = 0.0  # doing nothing then exiting is inherently imprecise
-    # Final weighted score
-    score = (
         GRADER_WEIGHT_RECOVERY * recovery
         + GRADER_WEIGHT_SPEED * speed
         + GRADER_WEIGHT_PRECISION * precision
         + GRADER_WEIGHT_SLO * slo
     )
     return max(0.01, min(0.99, round(score, 2)))
@@ -417,69 +421,63 @@ def _build_semantic_analysis(
     prev_obs: SystemObservation,
     recovering: list[str],
 ) -> str:
-    """Generate contextual narrative for the LLM judge."""
     parts: list[str] = []
     if not action_valid:
         parts.append(
-            f"Agent attempted '{action.action_type}' but the action was "
-            f"invalid. No system state was modified."
         )
     elif wrong_action:
         parts.append(
-            f"Agent applied '{action.action_type}' to "
-            f"'{action.target_service}' which was not significantly degraded. "
-            f"This indicates premature remediation before sufficient "
-            f"investigation. The actual root cause remains unaddressed."
         )
     elif action.action_type in ("fetch_logs", "get_metrics_detail", "trace_dependencies"):
         parts.append(
-            f"Agent performed investigation: '{action.action_type}' on "
-            f"'{action.target_service}'. This is an information-gathering "
-            f"step that does not modify system state."
         )
-    elif action.action_type in ("restart_service", "rollback_deploy", "revert_config", "scale_replicas", "circuit_break"):
-        parts.append(
-            f"Agent applied remediation: '{action.action_type}' to "
-            f"'{action.target_service}'."
-        )
-        if recovering:
-            parts.append(
-                f"System health is improving — services recovering: "
-                f"{recovering}."
-            )
-        else:
-            parts.append(
-                f"No immediate improvement observed. The remediation may "
-                f"need time to take effect, or it may be targeting the "
-                f"wrong service/fault type."
-            )
     elif action.action_type == "declare_resolved":
-        parts.append("Agent declared the incident resolved. Episode ending.")
     elif action.action_type == "escalate":
-        parts.append(
-            "Agent escalated the incident. This costs SLO budget but "
-            "brings specialist attention."
-        )
-    # Overall state assessment
-    degraded_count = sum(
-        1 for m in next_obs.services.values() if m.status != "healthy"
-    )
     total = len(next_obs.services)
-    if degraded_count == 0:
-        parts.append("All services are now healthy.")
-    elif degraded_count == total:
-        parts.append(
-            "All services are degraded — situation is critical. "
-            "Immediate action required."
-        )
-    else:
-        parts.append(
-            f"{degraded_count}/{total} services remain degraded."
-        )
-    return " ".join(parts)
 def _assess_progress(obs: SystemObservation, done: bool) -> str:
@@ -507,6 +505,41 @@ def _assess_progress(obs: SystemObservation, done: bool) -> str:
 # Helper
 # ==========================================================================
 def _mean_error_rate(obs: SystemObservation) -> float:
     """Compute mean error rate across all services in observation."""
     services = obs.services

         Returns:
             Tuple of (total_reward, breakdown_dict).
         """
+        # 1. Health improvement: weighted mean error rate decrease
+        prev_mean = _weighted_mean_error_rate(prev_obs.services, prev_obs.dependency_graph)
+        next_mean = _weighted_mean_error_rate(next_obs.services, next_obs.dependency_graph)
         health_improvement = (prev_mean - next_mean) * REWARD_WEIGHT_HEALTH
         # 2. SLO preservation: budget change
     if task is None:
         return 0.0
+    # Fix 1: Tick guard — declare_resolved before tick 2 earns near-zero score
+    if er.ticks_taken < 2:
+        return 0.05
     max_ticks = task.max_ticks
     max_bcm = task.max_bad_customer_minutes
     # 1. Recovery (40%)
+    # The tick guard above handles Fix 1 (tick-0 exploit).
+    # Use runtime services_affected as denominator — blast penalty (below) is what
+    # differentiates agents who contained vs didn't contain the cascade.
+    denominator = er.services_affected or 1
+    recovery = min(1.0, er.services_recovered / denominator)
+    # Fix 2: No cliff wipe — compute BCM and SLO unconditionally
+    bcm_score = max(0.0, 1.0 - (er.bad_customer_minutes / max_bcm))
+    slo = max(0.0, min(1.0, er.final_slo_budget / 100.0))
     # 2. Speed (25%) — composite of MTTM + BCM
     if er.mttm_ticks is not None:
         mttm_score = max(0.0, 1.0 - (er.mttm_ticks / max_ticks))
     else:
     precision = max(
         0.0, 1.0 - (er.wrong_actions * GRADER_WRONG_ACTION_PENALTY_PER_ACTION)
     )
     # False resolution penalty
     if recovery == 0.0:
         precision = 0.0  # doing nothing then exiting is inherently imprecise
+    # Raw weighted score
+    raw = (
         GRADER_WEIGHT_RECOVERY * recovery
         + GRADER_WEIGHT_SPEED * speed
         + GRADER_WEIGHT_PRECISION * precision
         + GRADER_WEIGHT_SLO * slo
     )
+    # Fix 3: Blast radius penalty — reward containing cascade, not just fixing it
+    total_services = er.total_services_in_episode or denominator
+    blast_ratio = er.services_affected / total_services if total_services > 0 else 0.0
+    blast_penalty = blast_ratio * 0.02
+    score = max(0.0, raw - blast_penalty)
     return max(0.01, min(0.99, round(score, 2)))
     prev_obs: SystemObservation,
     recovering: list[str],
 ) -> str:
+    """
+    Generate metric-delta context for the step info dict.
+    Reports WHAT changed (metric values and deltas), not WHETHER it was good.
+    The agent must interpret the numbers itself — no outcome framing.
+    """
     parts: list[str] = []
     if not action_valid:
         parts.append(
+            f"Action '{action.action_type}' was invalid. No state change."
         )
     elif wrong_action:
+        # Report metric context only — no interpretation
+        svc = action.target_service or ""
+        curr_er = next_obs.services[svc].http_server_error_rate if svc in next_obs.services else None
+        er_str = f"error_rate={curr_er:.2f}" if curr_er is not None else "error_rate=unknown"
         parts.append(
+            f"Action '{action.action_type}' targeted '{svc}' ({er_str}). "
+            f"Wrong-action penalty applied (threshold: 0.10)."
         )
     elif action.action_type in ("fetch_logs", "get_metrics_detail", "trace_dependencies"):
         parts.append(
+            f"Investigation '{action.action_type}' on '{action.target_service}'. "
+            f"No state mutation."
         )
+    elif action.action_type in (
+        "restart_service", "rollback_deploy", "revert_config",
+        "scale_replicas", "circuit_break",
+    ):
+        parts.append(f"Remediation '{action.action_type}' applied to '{action.target_service}'.")
+        # Report metric deltas — no interpretation of good/bad
+        if prev_obs:
+            for svc_name, curr in next_obs.services.items():
+                prev_svc = prev_obs.services.get(svc_name)
+                if prev_svc:
+                    delta = curr.http_server_error_rate - prev_svc.http_server_error_rate
+                    if abs(delta) > 0.05:
+                        direction = "increased" if delta > 0 else "decreased"
+                        parts.append(
+                            f"{svc_name} error_rate {direction} by {abs(delta):.2f} "
+                            f"(now {curr.http_server_error_rate:.2f})."
+                        )
     elif action.action_type == "declare_resolved":
+        parts.append("Agent declared incident resolved. Episode ending.")
     elif action.action_type == "escalate":
+        parts.append("Agent escalated incident.")
+    # Current state counts — factual only
+    degraded_count = sum(1 for m in next_obs.services.values() if m.status != "healthy")
     total = len(next_obs.services)
+    parts.append(f"{degraded_count}/{total} services non-healthy.")
+    if feedback:
+        parts.append(f"Feedback: {feedback}")
+    return " ".join(parts) if parts else "No significant changes this tick."
 def _assess_progress(obs: SystemObservation, done: bool) -> str:
 # Helper
 # ==========================================================================
+def _weighted_mean_error_rate(services: dict, dependency_graph: dict) -> float:
+    """
+    Compute mean error rate across services, weighted by downstream dependent count.
+    Weight formula: weight(svc) = 1 + count(other services that list svc as a dependency)
+    Example: api-gateway with 3 dependents → weight=4; cache leaf → weight=1.
+    Args:
+        services: Dict mapping service_name → ServiceMetrics (must have http_server_error_rate).
+        dependency_graph: Dict mapping service_name → list[dependency_name].
+    Returns:
+        Weighted mean error rate in [0.0, 1.0].
+    """
+    if not services:
+        return 0.0
+    # Count how many services-in-this-episode depend on each service
+    dependent_count: dict[str, int] = {svc: 0 for svc in services}
+    for svc, deps in dependency_graph.items():
+        if svc in services:
+            for dep in deps:
+                if dep in dependent_count:
+                    dependent_count[dep] = dependent_count.get(dep, 0) + 1
+    total_weight = 0.0
+    weighted_error = 0.0
+    for svc_name, metrics in services.items():
+        weight = 1 + dependent_count.get(svc_name, 0)
+        weighted_error += metrics.http_server_error_rate * weight
+        total_weight += weight
+    return weighted_error / total_weight if total_weight > 0 else 0.0
 def _mean_error_rate(obs: SystemObservation) -> float:
     """Compute mean error rate across all services in observation."""
     services = obs.services

simulation.py CHANGED Viewed

@@ -106,13 +106,18 @@ class IncidentMetrics:
     bad_customer_minutes: float = 0.0
     mttm_achieved_tick: int | None = None
     _mttm_locked: bool = field(default=False, repr=False)
     def update(self, bcm_delta: float, current_tick: int) -> None:
-        """Update BCM and check MTTM achievement."""
         self.bad_customer_minutes += bcm_delta
-        if bcm_delta <= 0.0 and not self._mttm_locked and current_tick > 0:
-            self.mttm_achieved_tick = current_tick
-            self._mttm_locked = True
 # ==========================================================================
@@ -709,6 +714,29 @@ def generate_episode(
     return mesh, fault_config
 # ==========================================================================
 # Public API
 # ==========================================================================
@@ -718,4 +746,5 @@ __all__ = [
     "IncidentMetrics",
     "ServiceMesh",
     "generate_episode",
 ]

     bad_customer_minutes: float = 0.0
     mttm_achieved_tick: int | None = None
     _mttm_locked: bool = field(default=False, repr=False)
+    _zero_bcm_streak: int = field(default=0, repr=False)
     def update(self, bcm_delta: float, current_tick: int) -> None:
+        """Update BCM and check MTTM achievement (requires 3 consecutive zero-BCM ticks)."""
         self.bad_customer_minutes += bcm_delta
+        if bcm_delta <= 0.0 and current_tick > 0:
+            self._zero_bcm_streak += 1
+            if self._zero_bcm_streak >= 3 and not self._mttm_locked:
+                self.mttm_achieved_tick = current_tick - 2
+                self._mttm_locked = True
+        else:
+            self._zero_bcm_streak = 0
 # ==========================================================================
     return mesh, fault_config
+def _count_blast_radius(mesh: "ServiceMesh", fault_config: "FaultConfig") -> int:
+    """
+    Count services that will be affected by this fault at full cascade propagation.
+    Uses BFS through the dependency graph from root cause service.
+    Used as static denominator in grade() to prevent tick-0 exploit.
+    Returns:
+        max(1, number of services reachable from root cause within CASCADE_MAX_DEPTH hops)
+    """
+    affected: set[str] = {fault_config.root_cause_service}
+    frontier: list[str] = [fault_config.root_cause_service]
+    for _ in range(CASCADE_MAX_DEPTH):
+        next_frontier: list[str] = []
+        for svc in frontier:
+            for downstream, deps in mesh.dependency_graph.items():
+                if svc in deps and downstream not in affected:
+                    affected.add(downstream)
+                    next_frontier.append(downstream)
+        frontier = next_frontier
+    return max(1, len(affected))
 # ==========================================================================
 # Public API
 # ==========================================================================
     "IncidentMetrics",
     "ServiceMesh",
     "generate_episode",
+    "_count_blast_radius",
 ]

tests/test_rewards_fixes.py CHANGED Viewed

@@ -5,7 +5,7 @@ All 5 tests must pass after implementing fixes 1–6.
 import types
 import pytest
-from firewatch_env.rewards import grade, EpisodeResult, _weighted_mean_error_rate
 def _er(affected, recovered, ticks, wrong, slo, bcm, static=None, total=None):
@@ -70,6 +70,7 @@ def test_blast_radius_fast_agent_scores_higher():
 def test_weighted_mean_error_rate_weights_by_dependents():
     """api-gateway with 3 dependents dominates over a leaf service."""
     def _svc(er):
         return types.SimpleNamespace(http_server_error_rate=er)
@@ -108,3 +109,31 @@ def test_variance_check():
     assert zero    < 0.10, f"zero={zero:.3f}, expected < 0.10"
     assert 0.10 <= wrong <= 0.60, f"wrong={wrong:.3f}, expected in [0.10, 0.60]"
     assert perfect - zero >= 0.50, f"gap={perfect - zero:.3f}, expected >= 0.50"

 import types
 import pytest
+from firewatch_env.rewards import grade, EpisodeResult
 def _er(affected, recovered, ticks, wrong, slo, bcm, static=None, total=None):
 def test_weighted_mean_error_rate_weights_by_dependents():
     """api-gateway with 3 dependents dominates over a leaf service."""
+    from firewatch_env.rewards import _weighted_mean_error_rate  # added in Task 4
     def _svc(er):
         return types.SimpleNamespace(http_server_error_rate=er)
     assert zero    < 0.10, f"zero={zero:.3f}, expected < 0.10"
     assert 0.10 <= wrong <= 0.60, f"wrong={wrong:.3f}, expected in [0.10, 0.60]"
     assert perfect - zero >= 0.50, f"gap={perfect - zero:.3f}, expected >= 0.50"
+# ── Fix 4: MTTM requires 3 consecutive zero-BCM ticks ─────────────────────
+def test_mttm_requires_3_consecutive_zero_bcm_ticks():
+    """MTTM must not be granted until 3 consecutive ticks with bcm_delta == 0."""
+    from firewatch_env.simulation import IncidentMetrics
+    m = IncidentMetrics()
+    m.update(bcm_delta=1.0, current_tick=1)   # BCM still moving
+    m.update(bcm_delta=0.0, current_tick=2)   # streak=1
+    m.update(bcm_delta=0.0, current_tick=3)   # streak=2
+    assert m.mttm_achieved_tick is None, "must not grant MTTM after only 2 consecutive zeros"
+    m.update(bcm_delta=0.0, current_tick=4)   # streak=3 → granted at tick 4-2=2
+    assert m.mttm_achieved_tick == 2, f"expected mttm_achieved_tick=2, got {m.mttm_achieved_tick}"
+def test_mttm_streak_resets_on_nonzero():
+    """A non-zero BCM tick must reset the streak — MTTM only after 3 unbroken zeros."""
+    from firewatch_env.simulation import IncidentMetrics
+    m = IncidentMetrics()
+    m.update(bcm_delta=0.0, current_tick=1)   # streak=1
+    m.update(bcm_delta=0.0, current_tick=2)   # streak=2
+    m.update(bcm_delta=1.0, current_tick=3)   # non-zero resets streak
+    m.update(bcm_delta=0.0, current_tick=4)   # streak=1 again
+    m.update(bcm_delta=0.0, current_tick=5)   # streak=2
+    assert m.mttm_achieved_tick is None, "streak was reset; MTTM must not be granted yet"
+    m.update(bcm_delta=0.0, current_tick=6)   # streak=3 → granted at tick 6-2=4
+    assert m.mttm_achieved_tick == 4, f"expected mttm_achieved_tick=4, got {m.mttm_achieved_tick}"