Spaces:

Naseer-010
/

DIME

Configuration error

App Files Files Community

Naseer-010 commited on Apr 8

Commit

d58554d

1 Parent(s): 4f9312c

debugging

Browse files

Files changed (4) hide show

inference.py +51 -24
server/environment.py +20 -20
server/models.py +5 -13
server/tasks.py +26 -17

inference.py CHANGED Viewed

@@ -31,10 +31,7 @@ TASKS = ["traffic_spike", "node_failure", "cascading_failure", "flash_crowd"]
 MAX_RETRIES = 3
 BENCHMARK = "distributed_infra_env"
-client = OpenAI(
-    base_url=API_BASE_URL,
-    api_key=API_KEY
-)
 SYSTEM_PROMPT = """You are an expert Site Reliability Engineer (SRE).
 You receive observations about the system state as JSON and must respond with a single action as JSON.
@@ -65,25 +62,40 @@ Respond with ONLY a valid JSON action object. No markdown formatting, and no oth
 # Required Logging Functions
 # ---------------------------------------------------------------------------
 def log_start(task: str, env: str, model: str) -> None:
     print(f"[START] task={task} env={env} model={model}", flush=True)
-def log_step(step: int, action: str, reward: float, done: bool, error: Optional[str]) -> None:
     error_val = error if error else "null"
     done_val = str(done).lower()
-    print(f"[STEP] step={step} action={action} reward={reward:.2f} done={done_val} error={error_val}", flush=True)
 def log_end(success: bool, steps: int, score: float, rewards: List[float]) -> None:
     rewards_str = ",".join(f"{r:.2f}" for r in rewards)
-    print(f"[END] success={str(success).lower()} steps={steps} score={score:.3f} rewards={rewards_str}", flush=True)
 # ---------------------------------------------------------------------------
 # Core Logic
 # ---------------------------------------------------------------------------
 def llm_decide(observation: dict) -> dict:
     obs_str = json.dumps(observation)
-    user_prompt = f"Current system state:\n{obs_str}\nRespond with ONLY a JSON action object."
     for attempt in range(MAX_RETRIES):
         try:
@@ -104,14 +116,19 @@ def llm_decide(observation: dict) -> dict:
             return json.loads(content)
         except Exception as e:
             # FIX: Print the actual error so it shows up in logs!
-            print(f"[DEBUG] LLM call attempt {attempt+1} failed: {str(e)}", flush=True)
             time.sleep(1)
     # If it fails all retries, return a no_op
     return {"action_type": "no_op"}
 def env_reset(task_id: str) -> dict:
-    response = requests.post(f"{ENV_SERVER_URL}/reset", json={"task": task_id}, timeout=10)
     response.raise_for_status()
     payload = response.json()
     data_block = payload.get("data", payload)
@@ -119,14 +136,18 @@ def env_reset(task_id: str) -> dict:
         return data_block["observation"]
     return data_block
 def env_step(action: dict) -> dict:
-    response = requests.post(f"{ENV_SERVER_URL}/step", json={"action": action}, timeout=10)
     response.raise_for_status()
     return response.json()
 def run_task(task_id: str) -> float:
     log_start(task=task_id, env=BENCHMARK, model=MODEL_NAME)
     try:
         obs = env_reset(task_id)
     except Exception as e:
@@ -135,18 +156,18 @@ def run_task(task_id: str) -> float:
     step = 0
     rewards_list = []
-    # Initialize task_score outside the loop so we always have a value
     # even if the loop breaks early or errors out.
     task_score = 0.0
     while True:
         step += 1
         action = llm_decide(obs)
         # Format action strictly on one line without quotes that break bash/parsing
-        action_str = json.dumps(action).replace('"', "'")
         error_msg = None
         reward = 0.0
         done = False
@@ -155,35 +176,41 @@ def run_task(task_id: str) -> float:
             # ---> THE CHANGES YOU ASKED ABOUT ARE HERE <---
             result = env_step(action)
             data_block = result.get("data", result)
-            if "observation" in data_block and isinstance(data_block["observation"], dict):
                 obs = data_block["observation"]
             else:
                 obs = data_block
             reward = float(data_block.get("reward", obs.get("reward", 0.0)))
             done = bool(data_block.get("done", obs.get("done", False)))
             # This continuously updates the task_score on every single step.
             task_score = float(obs.get("task_score", 0.0))
         except Exception as e:
-            error_msg = str(e).replace("\n", " ") # Prevent newline breaks in STDOUT
             done = True
         rewards_list.append(reward)
-        log_step(step=step, action=action_str, reward=reward, done=done, error=error_msg)
         # Even if step > 100 hits (timeout failure), task_score has the partial credit from the last step!
         if done or step > 100:
             # Define success: Let's say getting more than 0.1 points counts as partial success
-            success = task_score >= 0.1
             log_end(success=success, steps=step, score=task_score, rewards=rewards_list)
             return task_score
 def main():
     for task_id in TASKS:
         run_task(task_id)
 if __name__ == "__main__":
     main()

 MAX_RETRIES = 3
 BENCHMARK = "distributed_infra_env"
+client = OpenAI(base_url=API_BASE_URL, api_key=API_KEY)
 SYSTEM_PROMPT = """You are an expert Site Reliability Engineer (SRE).
 You receive observations about the system state as JSON and must respond with a single action as JSON.
 # Required Logging Functions
 # ---------------------------------------------------------------------------
 def log_start(task: str, env: str, model: str) -> None:
     print(f"[START] task={task} env={env} model={model}", flush=True)
+def log_step(
+    step: int, action: str, reward: float, done: bool, error: Optional[str]
+) -> None:
     error_val = error if error else "null"
     done_val = str(done).lower()
+    print(
+        f"[STEP] step={step} action={action} reward={reward:.2f} done={done_val} error={error_val}",
+        flush=True,
+    )
 def log_end(success: bool, steps: int, score: float, rewards: List[float]) -> None:
     rewards_str = ",".join(f"{r:.2f}" for r in rewards)
+    print(
+        f"[END] success={str(success).lower()} steps={steps} score={score:.3f} rewards={rewards_str}",
+        flush=True,
+    )
 # ---------------------------------------------------------------------------
 # Core Logic
 # ---------------------------------------------------------------------------
 def llm_decide(observation: dict) -> dict:
     obs_str = json.dumps(observation)
+    user_prompt = (
+        f"Current system state:\n{obs_str}\nRespond with ONLY a JSON action object."
+    )
     for attempt in range(MAX_RETRIES):
         try:
             return json.loads(content)
         except Exception as e:
             # FIX: Print the actual error so it shows up in logs!
+            print(
+                f"[DEBUG] LLM call attempt {attempt + 1} failed: {str(e)}", flush=True
+            )
             time.sleep(1)
     # If it fails all retries, return a no_op
     return {"action_type": "no_op"}
 def env_reset(task_id: str) -> dict:
+    response = requests.post(
+        f"{ENV_SERVER_URL}/reset", json={"task": task_id}, timeout=10
+    )
     response.raise_for_status()
     payload = response.json()
     data_block = payload.get("data", payload)
         return data_block["observation"]
     return data_block
 def env_step(action: dict) -> dict:
+    response = requests.post(
+        f"{ENV_SERVER_URL}/step", json={"action": action}, timeout=10
+    )
     response.raise_for_status()
     return response.json()
 def run_task(task_id: str) -> float:
     log_start(task=task_id, env=BENCHMARK, model=MODEL_NAME)
     try:
         obs = env_reset(task_id)
     except Exception as e:
     step = 0
     rewards_list = []
+    # Initialize task_score outside the loop so we always have a value
     # even if the loop breaks early or errors out.
     task_score = 0.0
     while True:
         step += 1
         action = llm_decide(obs)
         # Format action strictly on one line without quotes that break bash/parsing
+        action_str = json.dumps(action).replace('"', "'")
         error_msg = None
         reward = 0.0
         done = False
             # ---> THE CHANGES YOU ASKED ABOUT ARE HERE <---
             result = env_step(action)
             data_block = result.get("data", result)
+            if "observation" in data_block and isinstance(
+                data_block["observation"], dict
+            ):
                 obs = data_block["observation"]
             else:
                 obs = data_block
             reward = float(data_block.get("reward", obs.get("reward", 0.0)))
             done = bool(data_block.get("done", obs.get("done", False)))
             # This continuously updates the task_score on every single step.
             task_score = float(obs.get("task_score", 0.0))
         except Exception as e:
+            error_msg = str(e).replace("\n", " ")  # Prevent newline breaks in STDOUT
             done = True
         rewards_list.append(reward)
+        log_step(
+            step=step, action=action_str, reward=reward, done=done, error=error_msg
+        )
         # Even if step > 100 hits (timeout failure), task_score has the partial credit from the last step!
         if done or step > 100:
             # Define success: Let's say getting more than 0.1 points counts as partial success
+            success = task_score >= 0.1
             log_end(success=success, steps=step, score=task_score, rewards=rewards_list)
             return task_score
 def main():
     for task_id in TASKS:
         run_task(task_id)
 if __name__ == "__main__":
     main()

server/environment.py CHANGED Viewed

@@ -23,6 +23,7 @@ def _get_tasks():
     global _TASKS
     if _TASKS is None:
         from server.tasks import TASKS
         _TASKS = TASKS
     return _TASKS
@@ -41,10 +42,10 @@ class Node:
     capacity: int = 15
     is_failed: bool = False
     memory_util: float = 0.2
-    high_cpu_streak: int = 0       # consecutive steps above 90% CPU
-    restart_countdown: int = 0     # >0 means the node is restarting
-    is_temporary: bool = False     # True for scale-up nodes
-    ttl: int = 0                   # remaining lifetime for temp nodes
 @dataclass
@@ -56,9 +57,9 @@ class SimulationState:
     step_count: int = 0
     base_request_rate: float = 100.0
     current_request_rate: float = 100.0
-    throttle_rate: float = 1.0      # 1.0 = accept all
     latency_ms: float = 20.0
-    actions_taken: int = 0          # non-no_op actions
     cascade_bonus_awarded: bool = False
     task_id: str = ""
     max_steps: int = 30
@@ -74,6 +75,7 @@ class SimulationState:
 # Default graph topology: 8 nodes in a mesh-like structure
 # ---------------------------------------------------------------------------
 def _build_default_graph(n: int = 8) -> Tuple[List[Node], Dict[int, List[int]]]:
     """Create a default mesh-like graph of n nodes."""
     nodes = [Node(cpu_util=0.25 + random.uniform(-0.05, 0.05)) for _ in range(n)]
@@ -175,7 +177,7 @@ class DistributedInfraEnvironment(Environment):
             episode_id=eid,
             step_count=0,
             task_id=task_id,
-            task_score=0.0,
         )
         return self._make_observation()
@@ -224,7 +226,7 @@ class DistributedInfraEnvironment(Environment):
         # 10. Check termination
         tasks = _get_tasks()
         done = sim.step_count >= sim.max_steps
-        task_score = 0.0
         if sim.task_id in tasks:
             task_info = tasks[sim.task_id]
             if task_info["is_done"](self):
@@ -293,7 +295,8 @@ class DistributedInfraEnvironment(Environment):
                     for neighbor_idx in sim.adjacency.get(src, []):
                         if (
                             not sim.nodes[neighbor_idx].is_failed
-                            and sim.nodes[neighbor_idx].cpu_util > CASCADE_AWARENESS_THRESHOLD
                         ):
                             sim.cascade_bonus_awarded = True
                             break
@@ -310,9 +313,7 @@ class DistributedInfraEnvironment(Environment):
             sim.nodes.append(new_node)
             # Connect to a few existing nodes
             sim.adjacency[new_idx] = []
-            connect_to = self._rng.sample(
-                range(new_idx), min(3, new_idx)
-            )
             for c in connect_to:
                 sim.adjacency[new_idx].append(c)
                 sim.adjacency[c].append(new_idx)
@@ -385,9 +386,7 @@ class DistributedInfraEnvironment(Environment):
             new_adj: Dict[int, List[int]] = {}
             for k, v in sim.adjacency.items():
                 new_k = k if k < idx else k - 1
-                new_v = [
-                    (x if x < idx else x - 1) for x in v if x != idx
-                ]
                 new_adj[new_k] = new_v
             sim.adjacency = new_adj
@@ -411,7 +410,10 @@ class DistributedInfraEnvironment(Environment):
                 0.05,
                 min(
                     1.0,
-                    node.cpu_util + cpu_from_queue + cpu_from_processing - natural_decay
                     + self._rng.uniform(-0.02, 0.02),
                 ),
             )
@@ -439,7 +441,7 @@ class DistributedInfraEnvironment(Environment):
         # Latency model: base + queue component + CPU-pressure component
         base_latency = 10.0
         queue_latency = avg_queue * 1.5
-        cpu_latency = (avg_cpu ** 2) * 80.0  # non-linear increase under load
         new_latency = base_latency + queue_latency + cpu_latency
         # Exponential moving average
@@ -523,9 +525,7 @@ class DistributedInfraEnvironment(Environment):
         normalized_latency = min(2.0, sim.latency_ms / TARGET_LATENCY_MS)
         overloaded = sum(
-            1
-            for n in sim.nodes
-            if not n.is_failed and n.cpu_util > OVERLOAD_THRESHOLD
         )
         overload_fraction = overloaded / total

     global _TASKS
     if _TASKS is None:
         from server.tasks import TASKS
         _TASKS = TASKS
     return _TASKS
     capacity: int = 15
     is_failed: bool = False
     memory_util: float = 0.2
+    high_cpu_streak: int = 0  # consecutive steps above 90% CPU
+    restart_countdown: int = 0  # >0 means the node is restarting
+    is_temporary: bool = False  # True for scale-up nodes
+    ttl: int = 0  # remaining lifetime for temp nodes
 @dataclass
     step_count: int = 0
     base_request_rate: float = 100.0
     current_request_rate: float = 100.0
+    throttle_rate: float = 1.0  # 1.0 = accept all
     latency_ms: float = 20.0
+    actions_taken: int = 0  # non-no_op actions
     cascade_bonus_awarded: bool = False
     task_id: str = ""
     max_steps: int = 30
 # Default graph topology: 8 nodes in a mesh-like structure
 # ---------------------------------------------------------------------------
 def _build_default_graph(n: int = 8) -> Tuple[List[Node], Dict[int, List[int]]]:
     """Create a default mesh-like graph of n nodes."""
     nodes = [Node(cpu_util=0.25 + random.uniform(-0.05, 0.05)) for _ in range(n)]
             episode_id=eid,
             step_count=0,
             task_id=task_id,
+            task_score=0.01,
         )
         return self._make_observation()
         # 10. Check termination
         tasks = _get_tasks()
         done = sim.step_count >= sim.max_steps
+        task_score = 0.01
         if sim.task_id in tasks:
             task_info = tasks[sim.task_id]
             if task_info["is_done"](self):
                     for neighbor_idx in sim.adjacency.get(src, []):
                         if (
                             not sim.nodes[neighbor_idx].is_failed
+                            and sim.nodes[neighbor_idx].cpu_util
+                            > CASCADE_AWARENESS_THRESHOLD
                         ):
                             sim.cascade_bonus_awarded = True
                             break
             sim.nodes.append(new_node)
             # Connect to a few existing nodes
             sim.adjacency[new_idx] = []
+            connect_to = self._rng.sample(range(new_idx), min(3, new_idx))
             for c in connect_to:
                 sim.adjacency[new_idx].append(c)
                 sim.adjacency[c].append(new_idx)
             new_adj: Dict[int, List[int]] = {}
             for k, v in sim.adjacency.items():
                 new_k = k if k < idx else k - 1
+                new_v = [(x if x < idx else x - 1) for x in v if x != idx]
                 new_adj[new_k] = new_v
             sim.adjacency = new_adj
                 0.05,
                 min(
                     1.0,
+                    node.cpu_util
+                    + cpu_from_queue
+                    + cpu_from_processing
+                    - natural_decay
                     + self._rng.uniform(-0.02, 0.02),
                 ),
             )
         # Latency model: base + queue component + CPU-pressure component
         base_latency = 10.0
         queue_latency = avg_queue * 1.5
+        cpu_latency = (avg_cpu**2) * 80.0  # non-linear increase under load
         new_latency = base_latency + queue_latency + cpu_latency
         # Exponential moving average
         normalized_latency = min(2.0, sim.latency_ms / TARGET_LATENCY_MS)
         overloaded = sum(
+            1 for n in sim.nodes if not n.is_failed and n.cpu_util > OVERLOAD_THRESHOLD
         )
         overload_fraction = overloaded / total

server/models.py CHANGED Viewed

@@ -68,9 +68,7 @@ class InfraObservation(Observation):
     cpu_loads: List[float] = Field(
         description="CPU utilization [0.0, 1.0] for each node."
     )
-    queue_lengths: List[int] = Field(
-        description="Number of pending requests per node."
-    )
     failed_nodes: List[int] = Field(
         description="Indices of nodes currently in failed state."
     )
@@ -80,15 +78,11 @@ class InfraObservation(Observation):
     request_rate: float = Field(
         description="Incoming requests per second into the system."
     )
-    step: int = Field(
-        description="Current step within the episode."
-    )
     task_hint: str = Field(
         description="Natural language description of the current task objective."
     )
-    task_score: float = Field(
-        default=0.0, description="Current grader score"
-    )
 class InfraState(State):
@@ -96,9 +90,7 @@ class InfraState(State):
     Internal environment state extending the base OpenEnv State.
     """
-    task_id: Optional[str] = Field(
-        default=None, description="Current task identifier."
-    )
     task_score: float = Field(
-        default=0.0, description="Current task grader score [0.0, 1.0]."
     )

     cpu_loads: List[float] = Field(
         description="CPU utilization [0.0, 1.0] for each node."
     )
+    queue_lengths: List[int] = Field(description="Number of pending requests per node.")
     failed_nodes: List[int] = Field(
         description="Indices of nodes currently in failed state."
     )
     request_rate: float = Field(
         description="Incoming requests per second into the system."
     )
+    step: int = Field(description="Current step within the episode.")
     task_hint: str = Field(
         description="Natural language description of the current task objective."
     )
+    task_score: float = Field(default=0.01, description="Current grader score")
 class InfraState(State):
     Internal environment state extending the base OpenEnv State.
     """
+    task_id: Optional[str] = Field(default=None, description="Current task identifier.")
     task_score: float = Field(
+        default=0.01, description="Current task grader score in (0.0, 1.0) strictly."
     )

server/tasks.py CHANGED Viewed

@@ -4,7 +4,7 @@ Distributed Infrastructure Management Environment.
 Each task provides:
     - setup(env, rng): configure initial node states and scenario parameters
-    - grade(env): return float in [0.0, 1.0] with partial credit
     - is_done(env): termination condition check
     - hint: natural language task description for the agent
 """
@@ -21,6 +21,7 @@ if TYPE_CHECKING:
 # Task 1 — Easy: Traffic Spike Recovery
 # ============================================================================
 def _setup_traffic_spike(env: "DistributedInfraEnvironment", rng: "random.Random"):
     """System receives 3x normal request rate."""
     sim = env.sim
@@ -38,7 +39,7 @@ def _grade_traffic_spike(env: "DistributedInfraEnvironment") -> float:
     """
     sim = env.sim
     if not sim.latency_history:
-        return 0.0
     # Latency component: fraction of steps where latency was below target
     target = 50.0  # ms
@@ -46,14 +47,16 @@ def _grade_traffic_spike(env: "DistributedInfraEnvironment") -> float:
     latency_score = below_target / len(sim.latency_history)
     # Uptime component: average uptime ratio
-    avg_uptime = sum(sim.uptime_history) / len(sim.uptime_history) if sim.uptime_history else 1.0
     # Efficiency: penalty for excessive actions
     max_reasonable = sim.max_steps * 0.5
     efficiency = max(0.0, 1.0 - sim.actions_taken / max(1, max_reasonable))
     score = 0.50 * latency_score + 0.30 * avg_uptime + 0.20 * efficiency
-    return round(min(1.0, max(0.0, score)), 4)
 def _is_done_traffic_spike(env: "DistributedInfraEnvironment") -> bool:
@@ -64,6 +67,7 @@ def _is_done_traffic_spike(env: "DistributedInfraEnvironment") -> bool:
 # Task 2 — Medium: Single Node Failure
 # ============================================================================
 def _setup_node_failure(env: "DistributedInfraEnvironment", rng: "random.Random"):
     """One node will fail at step 5. Agent must maintain 80%+ uptime."""
     sim = env.sim
@@ -82,7 +86,7 @@ def _grade_node_failure(env: "DistributedInfraEnvironment") -> float:
     sim = env.sim
     if not sim.uptime_history:
-        return 0.0
     # MTTR: how quickly system recovered from the failure
     failure_duration = 0
@@ -105,7 +109,7 @@ def _grade_node_failure(env: "DistributedInfraEnvironment") -> float:
     restart_penalty = max(0.0, 1.0 - max(0, sim.restart_count - 1) / 5)
     score = 0.40 * mttr_score + 0.40 * uptime_score + 0.20 * restart_penalty
-    return round(min(1.0, max(0.0, score)), 4)
 def _is_done_node_failure(env: "DistributedInfraEnvironment") -> bool:
@@ -125,6 +129,7 @@ def _is_done_node_failure(env: "DistributedInfraEnvironment") -> bool:
 # Task 3 — Hard: Cascading Failure Prevention
 # ============================================================================
 def _setup_cascading_failure(env: "DistributedInfraEnvironment", rng: "random.Random"):
     """Two nodes near critical CPU. Agent must prevent cascade chain."""
     sim = env.sim
@@ -157,10 +162,7 @@ def _grade_cascading_failure(env: "DistributedInfraEnvironment") -> float:
     cascade_score = 1.0 if not sim.cascade_occurred else 0.3
     if sim.uptime_history:
-        healthy_now = sum(
-            1 for n in sim.nodes
-            if not n.is_failed and n.cpu_util < 0.85
-        )
         total_now = len(sim.nodes)
         cpu_score = healthy_now / total_now if total_now > 0 else 0.0
     else:
@@ -170,7 +172,7 @@ def _grade_cascading_failure(env: "DistributedInfraEnvironment") -> float:
     efficiency = max(0.0, 1.0 - sim.actions_taken / max(1, max_reasonable))
     score = 0.50 * cascade_score + 0.30 * cpu_score + 0.20 * efficiency
-    return round(min(1.0, max(0.0, score)), 4)
 def _is_done_cascading_failure(env: "DistributedInfraEnvironment") -> bool:
@@ -185,6 +187,7 @@ def _is_done_cascading_failure(env: "DistributedInfraEnvironment") -> bool:
 # Task 4 — Expert: Flash Crowd
 # ============================================================================
 def _setup_flash_crowd(env: "DistributedInfraEnvironment", rng: "random.Random"):
     """Massive 5x traffic spike. Agent must scale up AND throttle to survive."""
     sim = env.sim
@@ -194,24 +197,30 @@ def _setup_flash_crowd(env: "DistributedInfraEnvironment", rng: "random.Random")
         node.cpu_util = 0.60 + rng.uniform(-0.05, 0.1)
         node.queue_length = rng.randint(15, 30)
 def _grade_flash_crowd(env: "DistributedInfraEnvironment") -> float:
     """
     Score = Survival Uptime (50%) + Latency control (50%).
     Cascade penalty applied if the system collapses.
     """
     sim = env.sim
-    avg_uptime = sum(sim.uptime_history) / len(sim.uptime_history) if sim.uptime_history else 0.0
     # Latency target is more generous for a massive flash crowd (100ms)
-    target = 100.0
     below_target = sum(1 for lat in sim.latency_history if lat < target)
-    latency_score = below_target / len(sim.latency_history) if sim.latency_history else 0.0
     cascade_penalty = 0.4 if sim.cascade_occurred else 0.0
     score = 0.50 * avg_uptime + 0.50 * latency_score - cascade_penalty
-    return round(min(1.0, max(0.0, score)), 4)
 def _is_done_flash_crowd(env: "DistributedInfraEnvironment") -> bool:
     failed_count = sum(1 for n in env.sim.nodes if n.is_failed)

 Each task provides:
     - setup(env, rng): configure initial node states and scenario parameters
+    - grade(env): return float in (0.0, 1.0) with partial credit (strictly between 0 and 1)
     - is_done(env): termination condition check
     - hint: natural language task description for the agent
 """
 # Task 1 — Easy: Traffic Spike Recovery
 # ============================================================================
 def _setup_traffic_spike(env: "DistributedInfraEnvironment", rng: "random.Random"):
     """System receives 3x normal request rate."""
     sim = env.sim
     """
     sim = env.sim
     if not sim.latency_history:
+        return 0.01
     # Latency component: fraction of steps where latency was below target
     target = 50.0  # ms
     latency_score = below_target / len(sim.latency_history)
     # Uptime component: average uptime ratio
+    avg_uptime = (
+        sum(sim.uptime_history) / len(sim.uptime_history) if sim.uptime_history else 1.0
+    )
     # Efficiency: penalty for excessive actions
     max_reasonable = sim.max_steps * 0.5
     efficiency = max(0.0, 1.0 - sim.actions_taken / max(1, max_reasonable))
     score = 0.50 * latency_score + 0.30 * avg_uptime + 0.20 * efficiency
+    return round(min(0.99, max(0.01, score)), 4)
 def _is_done_traffic_spike(env: "DistributedInfraEnvironment") -> bool:
 # Task 2 — Medium: Single Node Failure
 # ============================================================================
 def _setup_node_failure(env: "DistributedInfraEnvironment", rng: "random.Random"):
     """One node will fail at step 5. Agent must maintain 80%+ uptime."""
     sim = env.sim
     sim = env.sim
     if not sim.uptime_history:
+        return 0.01
     # MTTR: how quickly system recovered from the failure
     failure_duration = 0
     restart_penalty = max(0.0, 1.0 - max(0, sim.restart_count - 1) / 5)
     score = 0.40 * mttr_score + 0.40 * uptime_score + 0.20 * restart_penalty
+    return round(min(0.99, max(0.01, score)), 4)
 def _is_done_node_failure(env: "DistributedInfraEnvironment") -> bool:
 # Task 3 — Hard: Cascading Failure Prevention
 # ============================================================================
 def _setup_cascading_failure(env: "DistributedInfraEnvironment", rng: "random.Random"):
     """Two nodes near critical CPU. Agent must prevent cascade chain."""
     sim = env.sim
     cascade_score = 1.0 if not sim.cascade_occurred else 0.3
     if sim.uptime_history:
+        healthy_now = sum(1 for n in sim.nodes if not n.is_failed and n.cpu_util < 0.85)
         total_now = len(sim.nodes)
         cpu_score = healthy_now / total_now if total_now > 0 else 0.0
     else:
     efficiency = max(0.0, 1.0 - sim.actions_taken / max(1, max_reasonable))
     score = 0.50 * cascade_score + 0.30 * cpu_score + 0.20 * efficiency
+    return round(min(0.99, max(0.01, score)), 4)
 def _is_done_cascading_failure(env: "DistributedInfraEnvironment") -> bool:
 # Task 4 — Expert: Flash Crowd
 # ============================================================================
 def _setup_flash_crowd(env: "DistributedInfraEnvironment", rng: "random.Random"):
     """Massive 5x traffic spike. Agent must scale up AND throttle to survive."""
     sim = env.sim
         node.cpu_util = 0.60 + rng.uniform(-0.05, 0.1)
         node.queue_length = rng.randint(15, 30)
 def _grade_flash_crowd(env: "DistributedInfraEnvironment") -> float:
     """
     Score = Survival Uptime (50%) + Latency control (50%).
     Cascade penalty applied if the system collapses.
     """
     sim = env.sim
+    avg_uptime = (
+        sum(sim.uptime_history) / len(sim.uptime_history) if sim.uptime_history else 0.0
+    )
     # Latency target is more generous for a massive flash crowd (100ms)
+    target = 100.0
     below_target = sum(1 for lat in sim.latency_history if lat < target)
+    latency_score = (
+        below_target / len(sim.latency_history) if sim.latency_history else 0.0
+    )
     cascade_penalty = 0.4 if sim.cascade_occurred else 0.0
     score = 0.50 * avg_uptime + 0.50 * latency_score - cascade_penalty
+    return round(min(0.99, max(0.01, score)), 4)
 def _is_done_flash_crowd(env: "DistributedInfraEnvironment") -> bool:
     failed_count = sum(1 for n in env.sim.nodes if n.is_failed)