fix(inference): refine scaling and rerouting rules and action behavior

- Updated task-2 and task-3 descriptions for clearer scaling and rerouting guidelines
- Modified SCALE_DOWN action to first cancel pending boots before removing active capacity
- Changed parsing of target_node_id and parameter to handle None values correctly
- Allowed NO_OP action to always succeed without target node validation
- Improved SCALE_DOWN implementation to prioritize canceling pending capacity
- Clarified cost tiers to classify pending capacity with justified excess at moderate rate
- Adjusted cost calculation for active and pending capacity to reflect tiered charging accurately

Files changed (4) hide show

control/validation.py +4 -0
inference.py +5 -5
server/AntiAtropos_environment.py +36 -16
simulator.py +18 -4

control/validation.py CHANGED Viewed

@@ -38,6 +38,10 @@ class ActionValidator:
         cooldown_penalty = 0.0
         if valid_targets is not None and target not in valid_targets:
             return False, f"Unknown target node: {target}", 0.0

         cooldown_penalty = 0.0
+        # NO_OP always succeeds — target and parameter don't matter
+        if action == "NO_OP":
+            return True, "Success", 0.0
         if valid_targets is not None and target not in valid_targets:
             return False, f"Unknown target node: {target}", 0.0

inference.py CHANGED Viewed

@@ -57,8 +57,8 @@ TEMPERATURE_SWEEP = [0.6, 0.3, 0.7]  # Fixed temperatures for multi-episode eval
 TASK_BRIEFS: Dict[str, str] = {
     "task-1": "Traffic ramps linearly every tick. Scale up proactively — new capacity takes 5 ticks to boot. Keep latency under SLA (200ms) while minimizing cost. Scale down when queues are safe.",
-    "task-2": "One node will fail permanently (any of node-1 through node-4, never node-0). STEP 1: scan all nodes and find which has status=FAILED and outflow=0. STEP 2: if the failed node has children, scale those children up (they are starved). STEP 3: reroute traffic from THE FAILED NODE (not its parent!) to healthy peers. If node-4 failed (independent), scale up node-0 to compensate.",
-    "task-3": "A surge (~75 req/tick) will hit node-1 and node-2 via a side channel bypassing node-0. Do NOT pre-scale — idle capacity costs money and there is no advance warning. Monitor queue_depth: ONLY scale node-1/node-2 when you SEE their queues climbing sharply. 3-4 SCALE_UPs on each is sufficient (5 replicas handles the burst at equilibrium). If queues don't drop after 4 SCALE_UPs, STOP scaling and try REROUTE instead. Scale down after queues return to safe levels to save cost.",
 }
 SYSTEM_PROMPT = textwrap.dedent(
@@ -74,7 +74,7 @@ SYSTEM_PROMPT = textwrap.dedent(
     ACTIONS (new capacity takes 5 ticks to boot):
       SCALE_UP <node> <amount>   — add capacity (0.3-0.5 normal, 0.6-0.8 heavy surge), clears DEGRADED
-      SCALE_DOWN <node> <amount>  — remove capacity (0.2-0.4 safe, 0.5-0.7 aggressive)
       REROUTE_TRAFFIC <node> <fraction> — reduce THIS node capacity, redistribute to peers (0.3-0.5)
       SHED_LOAD <node> <fraction>  — drop incoming traffic (0.3-0.5), NEVER on node-0 (payment gateway)
       NO_OP                           — do nothing
@@ -293,8 +293,8 @@ def _extract_json_object(text: str) -> dict:
 def _parse_action(payload: dict) -> SREAction:
     action_type = str(payload.get("action_type", "NO_OP")).upper()
-    target_node_id = str(payload.get("target_node_id", "node-0"))
-    parameter = float(payload.get("parameter", 0.0))
     return SREAction(
         action_type=ActionType(action_type),
         target_node_id=target_node_id,

 TASK_BRIEFS: Dict[str, str] = {
     "task-1": "Traffic ramps linearly every tick. Scale up proactively — new capacity takes 5 ticks to boot. Keep latency under SLA (200ms) while minimizing cost. Scale down when queues are safe.",
+    "task-2": "One node (node-1 through node-4) will fail permanently. Wait until you SEE a FAILED node — do NOT pre-scale. Once a node shows status=FAILED: reroute traffic FROM the failed node to healthy peers, and scale up any starved children. Do NOT scale node-0 unless node-4 failed independently. SCALE_DOWN cancels pending boots and reduces cost. If reward is falling, stop scaling.",
+    "task-3": "A surge (~75 req/tick) will hit node-1 and node-2 via a side channel bypassing node-0. Do NOT scale node-0 — it is NOT affected. ONLY scale node-1 or node-2 when their queue_depth rises. Do NOT pre-scale. 3-4 SCALE_UPs on each is sufficient. SCALE_DOWN cancels pending boots and reduces cost — use it when queues are safe. If reward is falling, STOP scaling and SCALE_DOWN to recover.",
 }
 SYSTEM_PROMPT = textwrap.dedent(
     ACTIONS (new capacity takes 5 ticks to boot):
       SCALE_UP <node> <amount>   — add capacity (0.3-0.5 normal, 0.6-0.8 heavy surge), clears DEGRADED
+      SCALE_DOWN <node> <amount>  — cancel pending boots first, then remove active capacity (0.2-0.4 safe, 0.5-0.7 aggressive)
       REROUTE_TRAFFIC <node> <fraction> — reduce THIS node capacity, redistribute to peers (0.3-0.5)
       SHED_LOAD <node> <fraction>  — drop incoming traffic (0.3-0.5), NEVER on node-0 (payment gateway)
       NO_OP                           — do nothing
 def _parse_action(payload: dict) -> SREAction:
     action_type = str(payload.get("action_type", "NO_OP")).upper()
+    target_node_id = str(payload.get("target_node_id") or "node-0")
+    parameter = float(payload.get("parameter") or 0.0)
     return SREAction(
         action_type=ActionType(action_type),
         target_node_id=target_node_id,

server/AntiAtropos_environment.py CHANGED Viewed

@@ -389,41 +389,61 @@ class AntiAtroposEnvironment(Environment):
         Tier 1 — Baseline capacity (up to DEFAULT_CAPACITY): cheap base rate.
             Infrastructure already provisioned and paid for — no penalty.
-        Tier 2 — Needed excess (above DEFAULT_CAPACITY, up to 'needed'): moderate
-            rate (4× base).  Agent added capacity that's actually serving traffic —
-            costs more but is justified by demand.
-        Tier 3 — Idle excess (above 'needed'): expensive penalty rate (20× base).
-            Capacity sitting idle beyond what traffic requires — pure waste.
-        'needed' = ceil(incoming_rate / 15) — minimum units to serve traffic.
-        With DEFAULT_CAPACITY=3, a node at baseline costs 3 × $0.05 = $0.15/hr
-        regardless of traffic.  Only scaling ABOVE baseline triggers higher rates,
-        giving the agent a clear gradient: scale just enough, not too much.
         """
         total_cost = 0.0
-        baseline_cap = int(DEFAULT_CAPACITY)  # Tier 1 ceiling (imported from simulator)
         for node in nodes_true:
             if node["status"] == NodeStatus.FAILED:
                 continue
-            capacity = int(node.get("capacity_units", 0)) + int(node.get("pending_capacity_units", 0))
             if capacity <= 0:
                 continue
             incoming = float(node.get("incoming_request_rate", 0.0))
             needed = max(1, int(math.ceil(incoming / 15.0)))
-            if capacity <= baseline_cap:
                 # Tier 1: baseline provisioned capacity — cheap base rate
-                total_cost += capacity * COST_PER_CAPACITY_UNIT_PER_HOUR
             else:
                 # Tier 1: baseline portion at cheap rate
                 total_cost += baseline_cap * COST_PER_CAPACITY_UNIT_PER_HOUR
-                above_baseline = capacity - baseline_cap
                 justified = max(0, needed - baseline_cap)  # excess that serves traffic
-                idle = max(0, above_baseline - justified)            # excess sitting idle (never negative)
                 # Tier 2: needed excess at moderate rate (4× base)
                 total_cost += justified * (COST_PER_CAPACITY_UNIT_PER_HOUR * 4.0)
                 # Tier 3: idle excess at penalty rate (20× base)
                 total_cost += idle * OVERPROVISION_COST_PER_UNIT
         return total_cost
     def _avg_latency(self, nodes: list[dict]) -> float:

         Tier 1 — Baseline capacity (up to DEFAULT_CAPACITY): cheap base rate.
             Infrastructure already provisioned and paid for — no penalty.
+        Tier 2 — Justified excess (above DEFAULT_CAPACITY, up to 'needed', or
+            pending/booting capacity): moderate rate (4× base).  Agent-added
+            capacity that's serving traffic OR in the boot queue — costs more
+            but is defensible.
+        Tier 3 — Idle excess (above 'needed', active only): expensive penalty
+            rate (20× base).  ACTIVE capacity sitting idle beyond what traffic
+            requires — pure waste.
+        Key: PENDING capacity is always charged at Tier 2 (justified), not Tier 3
+        (idle waste).  Pending units haven't booted yet so they CAN'T serve traffic;
+        classifying them as "idle waste" penalises the agent for the boot delay
+        which it cannot control.  Once they boot, they become active and are
+        reclassified as justified or idle based on actual traffic.
+        'needed' = ceil(incoming_rate / 15) — minimum ACTIVE units to serve traffic.
+        With DEFAULT_CAPACITY=3, a node at baseline costs 3 × $0.05 = $0.15/hr.
         """
         total_cost = 0.0
+        baseline_cap = int(DEFAULT_CAPACITY)  # Tier 1 ceiling
         for node in nodes_true:
             if node["status"] == NodeStatus.FAILED:
                 continue
+            active = int(node.get("capacity_units", 0))
+            pending = int(node.get("pending_capacity_units", 0))
+            capacity = active + pending
             if capacity <= 0:
                 continue
             incoming = float(node.get("incoming_request_rate", 0.0))
             needed = max(1, int(math.ceil(incoming / 15.0)))
+            # --- Active capacity ---
+            if active <= baseline_cap:
                 # Tier 1: baseline provisioned capacity — cheap base rate
+                total_cost += active * COST_PER_CAPACITY_UNIT_PER_HOUR
             else:
                 # Tier 1: baseline portion at cheap rate
                 total_cost += baseline_cap * COST_PER_CAPACITY_UNIT_PER_HOUR
+                above_baseline = active - baseline_cap
                 justified = max(0, needed - baseline_cap)  # excess that serves traffic
+                idle = max(0, above_baseline - justified)    # excess sitting idle
                 # Tier 2: needed excess at moderate rate (4× base)
                 total_cost += justified * (COST_PER_CAPACITY_UNIT_PER_HOUR * 4.0)
                 # Tier 3: idle excess at penalty rate (20× base)
                 total_cost += idle * OVERPROVISION_COST_PER_UNIT
+            # --- Pending capacity (always Tier 2 — booting, not yet serving) ---
+            if pending > 0:
+                # How much of the baseline budget is unused by active capacity?
+                baseline_remaining = max(0, baseline_cap - active)
+                # Pending fills remaining baseline slots first (Tier 1 rate)
+                pending_at_baseline = min(pending, baseline_remaining)
+                pending_above = pending - pending_at_baseline
+                total_cost += pending_at_baseline * COST_PER_CAPACITY_UNIT_PER_HOUR
+                total_cost += pending_above * (COST_PER_CAPACITY_UNIT_PER_HOUR * 4.0)  # Tier 2
         return total_cost
     def _avg_latency(self, nodes: list[dict]) -> float:

simulator.py CHANGED Viewed

@@ -282,12 +282,16 @@ class ClusterSimulator:
         node_id = action_model.target_node_id if hasattr(action_model, "target_node_id") else action_model["target_node_id"]
         param = action_model.parameter if hasattr(action_model, "parameter") else action_model["parameter"]
-        # 1. Target node lookup
         target = next((n for n in self._nodes if n.node_id == node_id), None)
         if not target:
             return False
-        # 2. Command implementation
         if at == "SCALE_UP":
             delta = max(1, int(param * MAX_SCALING_STEP))
             for _ in range(delta):
@@ -302,9 +306,19 @@ class ClusterSimulator:
         elif at == "SCALE_DOWN":
             delta = max(1, int(param * MAX_SCALING_STEP))
             old_capacity = target.capacity
-            target.capacity = max(1, target.capacity - delta)
-            return target.capacity != old_capacity  # Had effect if capacity actually changed
         elif at == "REROUTE_TRAFFIC":
             # Physically offload traffic FROM the target node by proportion `param`.

         node_id = action_model.target_node_id if hasattr(action_model, "target_node_id") else action_model["target_node_id"]
         param = action_model.parameter if hasattr(action_model, "parameter") else action_model["parameter"]
+        # 1. NO_OP always succeeds regardless of target node
+        if at == "NO_OP":
+            return True
+        # 2. Target node lookup (required for all other actions)
         target = next((n for n in self._nodes if n.node_id == node_id), None)
         if not target:
             return False
+        # 3. Command implementation
         if at == "SCALE_UP":
             delta = max(1, int(param * MAX_SCALING_STEP))
             for _ in range(delta):
         elif at == "SCALE_DOWN":
             delta = max(1, int(param * MAX_SCALING_STEP))
+            # First cancel any pending capacity (cancel boot queue entries)
+            # This is like canceling a VM launch — it hasn't served traffic yet.
+            cancelled = 0
+            while cancelled < delta and target.pending_capacity_queue:
+                target.pending_capacity_queue.pop()  # Remove newest pending first
+                cancelled += 1
+            # If still need to remove more, reduce active capacity
+            remaining = delta - cancelled
             old_capacity = target.capacity
+            if remaining > 0:
+                target.capacity = max(1, target.capacity - remaining)
+            # Had effect if we cancelled pending or reduced active capacity
+            return cancelled > 0 or target.capacity != old_capacity
         elif at == "REROUTE_TRAFFIC":
             # Physically offload traffic FROM the target node by proportion `param`.