model changes

Files changed (8) hide show

.env.example +3 -0
control/kubernetes_executor.py +136 -1
deploy/LOCAL_LAPTOP_FASTAPI_GUIDE.md +1 -1
deploy/aws/ARCHITECTURE.md +2 -2
deploy/do/antiatropos-pod-trim.sh +44 -0
deploy/do/deploy-droplet-one-shot.sh +13 -0
inference.py +12 -4
server/local_laptop_control.py +134 -3

.env.example CHANGED Viewed

@@ -22,6 +22,9 @@ ANTIATROPOS_MIN_REPLICAS=1
 ANTIATROPOS_MAX_REPLICAS=
 ANTIATROPOS_SCALE_STEP=3
 # Node -> deployment map used by Kubernetes executor
 ANTIATROPOS_WORKLOAD_MAP={"node-0":{"deployment":"payments","namespace":"prod-sre"},"node-1":{"deployment":"checkout","namespace":"prod-sre"},"node-2":{"deployment":"catalog","namespace":"prod-sre"},"node-3":{"deployment":"cart","namespace":"prod-sre"},"node-4":{"deployment":"auth","namespace":"prod-sre"}}

 ANTIATROPOS_MAX_REPLICAS=
 ANTIATROPOS_SCALE_STEP=3
+# Pod trim: auto-reset deployments to min_replicas and prune stale pods (every 30 min)
+ANTIATROPOS_TRIM_INTERVAL_S=1800
 # Node -> deployment map used by Kubernetes executor
 ANTIATROPOS_WORKLOAD_MAP={"node-0":{"deployment":"payments","namespace":"prod-sre"},"node-1":{"deployment":"checkout","namespace":"prod-sre"},"node-2":{"deployment":"catalog","namespace":"prod-sre"},"node-3":{"deployment":"cart","namespace":"prod-sre"},"node-4":{"deployment":"auth","namespace":"prod-sre"}}

control/kubernetes_executor.py CHANGED Viewed

@@ -30,7 +30,7 @@ class KubernetesExecutor:
         self.scale_step = int(os.getenv("ANTIATROPOS_SCALE_STEP", "3"))
         self._apps_v1_api = None
         self._node_workload_map = self._load_node_workload_map()
-        self._live_supported_actions = {"NO_OP", "SCALE_UP", "SCALE_DOWN"}
         self.k8s_retry_count = int(os.getenv("ANTIATROPOS_K8S_RETRY_COUNT", "2"))
         self.k8s_retry_backoff_s = float(os.getenv("ANTIATROPOS_K8S_RETRY_BACKOFF_S", "0.2"))
@@ -131,6 +131,12 @@ class KubernetesExecutor:
         if action in ("SCALE_UP", "SCALE_DOWN"):
             return self._scale_deployment(action, target, parameter)
         return f"Rejected: {action} is not enabled for live Kubernetes execution"
     def _mock_execution(self, action_type: str, target: str, parameter: float) -> str:
@@ -177,6 +183,135 @@ class KubernetesExecutor:
             f"in namespace {namespace} scaled {current}->{desired}"
         )
     def _patch_deployment_scale_with_retry(self, apps_v1, deployment_name: str, namespace: str, desired: int) -> None:
         """
         Patch deployment replicas with retries for transient API server errors.

         self.scale_step = int(os.getenv("ANTIATROPOS_SCALE_STEP", "3"))
         self._apps_v1_api = None
         self._node_workload_map = self._load_node_workload_map()
+        self._live_supported_actions = {"NO_OP", "SCALE_UP", "SCALE_DOWN", "REROUTE_TRAFFIC", "SHED_LOAD"}
         self.k8s_retry_count = int(os.getenv("ANTIATROPOS_K8S_RETRY_COUNT", "2"))
         self.k8s_retry_backoff_s = float(os.getenv("ANTIATROPOS_K8S_RETRY_BACKOFF_S", "0.2"))
         if action in ("SCALE_UP", "SCALE_DOWN"):
             return self._scale_deployment(action, target, parameter)
+        if action == "REROUTE_TRAFFIC":
+            return self._reroute_traffic(target, parameter)
+        if action == "SHED_LOAD":
+            return self._shed_load(target, parameter)
         return f"Rejected: {action} is not enabled for live Kubernetes execution"
     def _mock_execution(self, action_type: str, target: str, parameter: float) -> str:
             f"in namespace {namespace} scaled {current}->{desired}"
         )
+    def _reroute_traffic(self, target: str, parameter: float) -> str:
+        """
+        Live implementation of REROUTE_TRAFFIC.
+        Shifts capacity away from the target node onto healthy peers by:
+          1. Scaling DOWN the target deployment by parameter * current_replicas
+             (min: min_replicas, so at least 1 replica remains).
+          2. Distributing the shed replicas equally across all other healthy
+             deployments as a SCALE_UP (best-effort, capped at max_replicas).
+        This reuses the same patch_namespaced_deployment_scale mechanism as
+        SCALE_UP/SCALE_DOWN, ensuring observable cluster mutations.
+        """
+        namespace, deployment_name = self._resolve_workload_target(target)
+        apps_v1 = self._get_apps_v1_api()
+        scale_obj = apps_v1.read_namespaced_deployment_scale(
+            name=deployment_name,
+            namespace=namespace,
+        )
+        current_target = int(scale_obj.spec.replicas or self.min_replicas)
+        frac = min(1.0, max(0.0, float(parameter)))
+        delta = max(1, int(current_target * frac))
+        new_target = max(self.min_replicas, current_target - delta)
+        messages: list[str] = []
+        if new_target != current_target:
+            self._patch_deployment_scale_with_retry(
+                apps_v1=apps_v1,
+                deployment_name=deployment_name,
+                namespace=namespace,
+                desired=new_target,
+            )
+            messages.append(
+                f"target {deployment_name} scaled {current_target}->{new_target}"
+            )
+        else:
+            messages.append(
+                f"target {deployment_name} unchanged at {current_target} (already at min)"
+            )
+        # Redistribute shed replicas across healthy peers (best-effort)
+        healthy_peers = [
+            (peer_id, peer_info)
+            for peer_id, peer_info in self._node_workload_map.items()
+            if peer_id != target
+        ]
+        if healthy_peers and delta > 0:
+            peer_delta = max(1, delta // len(healthy_peers))
+            scaled_peers = 0
+            for peer_id, peer_info in healthy_peers:
+                peer_deployment = peer_info["deployment"]
+                peer_ns = peer_info.get("namespace", self.namespace)
+                try:
+                    peer_scale = apps_v1.read_namespaced_deployment_scale(
+                        name=peer_deployment, namespace=peer_ns,
+                    )
+                    peer_current = int(peer_scale.spec.replicas or self.min_replicas)
+                    peer_desired = peer_current + peer_delta
+                    if self.max_replicas is not None:
+                        peer_desired = min(self.max_replicas, peer_desired)
+                    if peer_desired != peer_current:
+                        self._patch_deployment_scale_with_retry(
+                            apps_v1=apps_v1,
+                            deployment_name=peer_deployment,
+                            namespace=peer_ns,
+                            desired=peer_desired,
+                        )
+                        scaled_peers += 1
+                except Exception:
+                    pass  # best-effort for peers
+            if scaled_peers:
+                messages.append(
+                    f"redistributed +{peer_delta} replicas to {scaled_peers} peer(s)"
+                )
+        return (
+            f"Ack: REROUTE_TRAFFIC for {target} (frac={frac:.2f}) - "
+            + "; ".join(messages)
+        )
+    def _shed_load(self, target: str, parameter: float) -> str:
+        """
+        Live implementation of SHED_LOAD.
+        Drops a fraction of capacity from the target node by scaling DOWN
+        its deployment.  The shed fraction decays over time in the simulator,
+        but in live mode the replica reduction is permanent until the agent
+        explicitly scales back up.
+        Critical nodes (node-0, node-1, node-2) are guarded by validation
+        before this method is ever called.
+        """
+        namespace, deployment_name = self._resolve_workload_target(target)
+        apps_v1 = self._get_apps_v1_api()
+        scale_obj = apps_v1.read_namespaced_deployment_scale(
+            name=deployment_name,
+            namespace=namespace,
+        )
+        current = int(scale_obj.spec.replicas or self.min_replicas)
+        frac = min(1.0, max(0.0, float(parameter)))
+        delta = max(1, int(current * frac))
+        desired = max(self.min_replicas, current - delta)
+        if desired == current:
+            return (
+                f"Ack: SHED_LOAD for {target} - replicas unchanged at {current} "
+                f"(already at min_replicas={self.min_replicas})"
+            )
+        self._patch_deployment_scale_with_retry(
+            apps_v1=apps_v1,
+            deployment_name=deployment_name,
+            namespace=namespace,
+            desired=desired,
+        )
+        return (
+            f"Ack: SHED_LOAD for {target} - deployment {deployment_name} "
+            f"in namespace {namespace} scaled {current}->{desired} "
+            f"(shed {delta} replicas, frac={frac:.2f})"
+        )
     def _patch_deployment_scale_with_retry(self, apps_v1, deployment_name: str, namespace: str, desired: int) -> None:
         """
         Patch deployment replicas with retries for transient API server errors.

deploy/LOCAL_LAPTOP_FASTAPI_GUIDE.md CHANGED Viewed

@@ -50,7 +50,7 @@ Check:
 ## 5) Let your agent execute actions
 The server accepts `POST /step` with:
-- `action_type`: `NO_OP` | `SCALE_UP` | `SCALE_DOWN`
 - `target_node_id`: `node-*`
 - `parameter`: float

 ## 5) Let your agent execute actions
 The server accepts `POST /step` with:
+- `action_type`: `NO_OP` | `SCALE_UP` | `SCALE_DOWN` | `REROUTE_TRAFFIC` | `SHED_LOAD`
 - `target_node_id`: `node-*`
 - `parameter`: float

deploy/aws/ARCHITECTURE.md CHANGED Viewed

@@ -106,8 +106,8 @@ Every "tick" (one step of the simulation), the agent goes through this cycle:
 |---|---|---|
 | `SCALE_UP` | "node-0 needs more capacity" | `KubernetesExecutor` patches `payments` Deployment: `replicas: 2 -> 5` |
 | `SCALE_DOWN` | "node-3 is over-provisioned" | `KubernetesExecutor` patches `cart` Deployment: `replicas: 4 -> 1` |
-| `REROUTE_TRAFFIC` | "Move traffic away from node-2" | Currently simulation-only (no live K8s ingress patching) |
-| `SHED_LOAD` | "Drop 50% of traffic to node-3" | Currently simulation-only (no live K8s traffic shaping) |
 | `NO_OP` | "Do nothing this tick" | Nothing changes on EKS |
 ### The SCALE_UP Flow in Detail

 |---|---|---|
 | `SCALE_UP` | "node-0 needs more capacity" | `KubernetesExecutor` patches `payments` Deployment: `replicas: 2 -> 5` |
 | `SCALE_DOWN` | "node-3 is over-provisioned" | `KubernetesExecutor` patches `cart` Deployment: `replicas: 4 -> 1` |
+| `REROUTE_TRAFFIC` | "Move traffic away from node-2" | `KubernetesExecutor` scales DOWN target deployment and redistributes replicas to healthy peer deployments |
+| `SHED_LOAD` | "Drop 50% of traffic to node-3" | `KubernetesExecutor` scales DOWN target deployment by `parameter * current_replicas` |
 | `NO_OP` | "Do nothing this tick" | Nothing changes on EKS |
 ### The SCALE_UP Flow in Detail

deploy/do/antiatropos-pod-trim.sh ADDED Viewed

	@@ -0,0 +1,44 @@

+#!/usr/bin/env bash
+# antiatropos-pod-trim.sh
+# Resets all prod-sre deployments to their minimum replica count
+# AND deletes completed/failed/evicted pods to prevent accumulation.
+# Installed as a cron job to prevent pod stacking across episodes.
+set -euo pipefail
+KUBECONFIG="${KUBECONFIG:-/etc/rancher/k3s/k3s.yaml}"
+export KUBECONFIG
+NAMESPACE="${1:-prod-sre}"
+MIN_REPLICAS="${2:-1}"
+trimmed=0
+while IFS= read -r deploy; do
+    current=$(kubectl get deploy "$deploy" -n "$NAMESPACE" -o jsonpath='{.spec.replicas}' 2>/dev/null || echo "0")
+    if [[ "$current" -gt "$MIN_REPLICAS" ]]; then
+        kubectl scale deploy "$deploy" -n "$NAMESPACE" --replicas="$MIN_REPLICAS" >/dev/null 2>&1
+        trimmed=$((trimmed + 1))
+    fi
+done < <(kubectl get deploy -n "$NAMESPACE" -o jsonpath='{.items[*].metadata.name}' 2>/dev/null)
+# Delete completed (Succeeded), failed, and evicted pods across the namespace.
+# These accumulate across episodes and can exhaust node resources
+# even after deployments are scaled back down.
+deleted=0
+for phase in Succeeded Failed; do
+    while IFS= read -r pod; do
+        [[ -z "$pod" ]] && continue
+        kubectl delete pod "$pod" -n "$NAMESPACE" --force --grace-period=0 >/dev/null 2>&1 && deleted=$((deleted + 1))
+    done < <(kubectl get pods -n "$NAMESPACE" --field-selector=status.phase=$phase -o jsonpath='{.items[*].metadata.name}' 2>/dev/null)
+done
+# Also nuke evicted pods (reason=Evicted, phase=Failed is often covered
+# above, but some k3s versions keep evicted pods in a weird state).
+while IFS= read -r pod; do
+    [[ -z "$pod" ]] && continue
+    kubectl delete pod "$pod" -n "$NAMESPACE" --force --grace-period=0 >/dev/null 2>&1 && deleted=$((deleted + 1))
+done < <(kubectl get pods -n "$NAMESPACE" -o json | \
+    grep -l '"reason": "Evicted"' >/dev/null 2>&1 && \
+    kubectl get pods -n "$NAMESPACE" -o jsonpath='{range .items[?(@.status.reason=="Evicted")]}{.metadata.name}{"\n"}{end}' 2>/dev/null || true)
+if [[ "$trimmed" -gt 0 || "$deleted" -gt 0 ]]; then
+    echo "$(date -Iseconds) Trimmed $trimmed deployments to $MIN_REPLICAS replicas, deleted $deleted stale pods in $NAMESPACE"
+fi

deploy/do/deploy-droplet-one-shot.sh CHANGED Viewed

@@ -103,6 +103,7 @@ ANTIATROPOS_K8S_NAMESPACE=prod-sre
 ANTIATROPOS_MIN_REPLICAS=${MIN_REPLICAS}
 ANTIATROPOS_MAX_REPLICAS=${MAX_REPLICAS}
 ANTIATROPOS_SCALE_STEP=${SCALE_STEP}
 ANTIATROPOS_WORKLOAD_MAP=${WORKLOAD_MAP}
 EOF
   echo "Created ${ENV_FILE}"
@@ -149,6 +150,18 @@ EOF
 systemctl daemon-reload
 systemctl enable --now antiatropos-control
 echo ""
 echo "Waiting for control API readiness..."
 for _ in {1..30}; do

 ANTIATROPOS_MIN_REPLICAS=${MIN_REPLICAS}
 ANTIATROPOS_MAX_REPLICAS=${MAX_REPLICAS}
 ANTIATROPOS_SCALE_STEP=${SCALE_STEP}
+ANTIATROPOS_TRIM_INTERVAL_S=1800
 ANTIATROPOS_WORKLOAD_MAP=${WORKLOAD_MAP}
 EOF
   echo "Created ${ENV_FILE}"
 systemctl daemon-reload
 systemctl enable --now antiatropos-control
+# --- Pod trim cron: resets prod-sre deployments to min replicas every 30 min ---
+TRIM_SCRIPT="/usr/local/bin/antiatropos-pod-trim.sh"
+if [[ -f "${REPO_DIR}/deploy/do/antiatropos-pod-trim.sh" ]]; then
+  cp "${REPO_DIR}/deploy/do/antiatropos-pod-trim.sh" "${TRIM_SCRIPT}"
+  chmod +x "${TRIM_SCRIPT}"
+  (crontab -l 2>/dev/null | grep -v 'antiatropos-pod-trim'; echo "*/30 * * * * KUBECONFIG=${KUBECONFIG_PATH} ${TRIM_SCRIPT} ${K8S_NAMESPACE} ${MIN_REPLICAS} >> /var/log/antiatropos-trim.log 2>&1") | crontab -
+  echo "Pod trim cron installed: every 30 min, resets ${K8S_NAMESPACE} deployments to ${MIN_REPLICAS} replicas + prunes stale pods"
+  echo "  Log: /var/log/antiatropos-trim.log"
+else
+  echo "WARNING: antiatropos-pod-trim.sh not found; skipping cron setup"
+fi
 echo ""
 echo "Waiting for control API readiness..."
 for _ in {1..30}; do

inference.py CHANGED Viewed

@@ -52,7 +52,7 @@ MAX_TOKENS = int(os.getenv("ANTIATROPOS_MAX_TOKENS", "180"))
 SEED = int(os.getenv("ANTIATROPOS_SEED", "42"))
 SUCCESS_SCORE_THRESHOLD = float(os.getenv("ANTIATROPOS_SUCCESS_THRESHOLD", "0.55"))
 EVAL_RUNS = int(os.getenv("ANTIATROPOS_EVAL_RUNS", "3"))  # Num eval runs per task
-TEMPERATURE_SWEEP = [0.0, 0.3, 0.7]  # Fixed temperatures for multi-episode eval
 TASK_BRIEFS: Dict[str, str] = {
     "task-1": "Traffic increases linearly. Scale proactively to keep latency low and cost efficient.",
@@ -62,7 +62,9 @@ TASK_BRIEFS: Dict[str, str] = {
 SYSTEM_PROMPT = textwrap.dedent(
     """
-    You are an autonomous SRE controller managing a ten-node microservice cluster.
     Return exactly one JSON object:
     {
@@ -178,6 +180,14 @@ def build_user_prompt(task_id: str, step: int, obs: dict, history: List[str], de
 def observation_for_model(obs) -> dict:
     return {
         "task_id": obs.task_id,
         "mode": getattr(obs.mode, "value", str(obs.mode)),
@@ -197,8 +207,6 @@ def observation_for_model(obs) -> dict:
             {
                 "node_id": node.node_id,
                 "status": getattr(node.status, "value", str(node.status)),
-                "is_vip": node.is_vip,
-                "importance_weight": node.importance_weight,
                 "queue_depth": node.queue_depth,
                 "latency_ms": node.latency_ms,
                 "incoming_request_rate": node.incoming_request_rate,

 SEED = int(os.getenv("ANTIATROPOS_SEED", "42"))
 SUCCESS_SCORE_THRESHOLD = float(os.getenv("ANTIATROPOS_SUCCESS_THRESHOLD", "0.55"))
 EVAL_RUNS = int(os.getenv("ANTIATROPOS_EVAL_RUNS", "3"))  # Num eval runs per task
+TEMPERATURE_SWEEP = [0.7, 0.3, 0.7]  # Fixed temperatures for multi-episode eval
 TASK_BRIEFS: Dict[str, str] = {
     "task-1": "Traffic increases linearly. Scale proactively to keep latency low and cost efficient.",
 SYSTEM_PROMPT = textwrap.dedent(
     """
+    You are an autonomous SRE controller managing a five-node microservice cluster.
+    node-0 is the payment gateway (higher business priority, receives 2x reward weight).
+    Balance protection of node-0 with the health of all other nodes — do not ignore nodes 1-4.
     Return exactly one JSON object:
     {
 def observation_for_model(obs) -> dict:
+    """
+    Build a compact observation dict for the LLM.
+    IMPORTANT: is_vip and importance_weight are deliberately EXCLUDED.
+    The LLM must learn which nodes matter from rewards alone, not from
+    explicit bias signals in the observation.  Including these fields
+    caused the model to fixate on node-0 and ignore nodes 1-4.
+    """
     return {
         "task_id": obs.task_id,
         "mode": getattr(obs.mode, "value", str(obs.mode)),
             {
                 "node_id": node.node_id,
                 "status": getattr(node.status, "value", str(node.status)),
                 "queue_depth": node.queue_depth,
                 "latency_ms": node.latency_ms,
                 "incoming_request_rate": node.incoming_request_rate,

server/local_laptop_control.py CHANGED Viewed

@@ -3,7 +3,7 @@ Lightweight FastAPI control plane for local laptop Kubernetes testing.
 Purpose:
 - Accept simple SRE actions over HTTP
-- Execute SCALE_UP / SCALE_DOWN / NO_OP against local deployments
 - Keep a minimal in-memory action history for debugging
 Run:
@@ -12,12 +12,16 @@ Run:
 from __future__ import annotations
 from datetime import datetime, timezone
 from typing import Any
 from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel, Field
 try:
     from ..control import KubernetesExecutor
 except (ImportError, ModuleNotFoundError):
@@ -25,7 +29,7 @@ except (ImportError, ModuleNotFoundError):
 class ActionRequest(BaseModel):
-    action_type: str = Field(description="NO_OP | SCALE_UP | SCALE_DOWN")
     target_node_id: str = Field(description="node-0 .. node-9")
     parameter: float = Field(default=0.0, ge=0.0, le=10.0)
@@ -50,9 +54,111 @@ STATE: dict[str, Any] = {
     "step_count": 0,
     "last_action": None,
     "history": [],
 }
-_ALLOWED_ACTIONS = {"NO_OP", "SCALE_UP", "SCALE_DOWN"}
 def _now_utc_iso() -> str:
@@ -68,6 +174,7 @@ def health() -> dict[str, Any]:
         "kubeconfig": executor.kubeconfig,
         "mapped_targets": sorted(list(executor._node_workload_map.keys())),
         "allowed_actions": sorted(list(_ALLOWED_ACTIONS)),
     }
@@ -85,10 +192,34 @@ def state() -> dict[str, Any]:
         "step_count": STATE["step_count"],
         "last_action": STATE["last_action"],
         "history_size": len(STATE["history"]),
         "is_mock": executor.is_mock,
     }
 @app.post("/step", response_model=ActionResponse)
 def step(action: ActionRequest) -> ActionResponse:
     if executor.is_mock:

 Purpose:
 - Accept simple SRE actions over HTTP
+- Execute SCALE_UP / SCALE_DOWN / REROUTE_TRAFFIC / SHED_LOAD / NO_OP against local deployments
 - Keep a minimal in-memory action history for debugging
 Run:
 from __future__ import annotations
+import subprocess
+import threading
 from datetime import datetime, timezone
 from typing import Any
 from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel, Field
+import os
 try:
     from ..control import KubernetesExecutor
 except (ImportError, ModuleNotFoundError):
 class ActionRequest(BaseModel):
+    action_type: str = Field(description="NO_OP | SCALE_UP | SCALE_DOWN | REROUTE_TRAFFIC | SHED_LOAD")
     target_node_id: str = Field(description="node-0 .. node-9")
     parameter: float = Field(default=0.0, ge=0.0, le=10.0)
     "step_count": 0,
     "last_action": None,
     "history": [],
+    "last_trim": None,
 }
+_ALLOWED_ACTIONS = {"NO_OP", "SCALE_UP", "SCALE_DOWN", "REROUTE_TRAFFIC", "SHED_LOAD"}
+# Background trim interval (seconds). Default 30 minutes.
+TRIM_INTERVAL_S = int(os.getenv("ANTIATROPOS_TRIM_INTERVAL_S", "1800"))
+def _run_kubectl_trim() -> dict[str, Any]:
+    """
+    Run the pod-trim logic inline via kubectl subprocess calls.
+    Scales every deployment in the namespace back to min_replicas
+    and force-deletes completed / failed / evicted pods.
+    Returns a summary dict.
+    """
+    ns = executor.namespace
+    min_r = executor.min_replicas
+    kubeconfig = executor.kubeconfig
+    result: dict[str, Any] = {
+        "namespace": ns,
+        "min_replicas": min_r,
+        "deployments_scaled": 0,
+        "pods_deleted": 0,
+        "errors": [],
+    }
+    def _kubectl(args: list[str]) -> str:
+        env = None
+        if kubeconfig and kubeconfig.lower() not in ("mock", ""):
+            import os as _os
+            env = {**_os.environ, "KUBECONFIG": kubeconfig}
+        try:
+            proc = subprocess.run(
+                ["kubectl"] + args,
+                capture_output=True,
+                text=True,
+                timeout=30,
+                env=env,
+            )
+            return proc.stdout.strip()
+        except Exception as exc:
+            result["errors"].append(str(exc))
+            return ""
+    # Scale deployments back to min_replicas
+    deploys = _kubectl(["get", "deploy", "-n", ns, "-o", "jsonpath={.items[*].metadata.name}"])
+    for name in deploys.split():
+        if not name:
+            continue
+        cur = _kubectl(["get", "deploy", name, "-n", ns, "-o", "jsonpath={.spec.replicas}"])
+        try:
+            cur_r = int(cur)
+        except ValueError:
+            continue
+        if cur_r > min_r:
+            _kubectl(["scale", "deploy", name, "-n", ns, "--replicas", str(min_r)])
+            result["deployments_scaled"] += 1
+    # Delete completed and failed pods
+    for phase in ("Succeeded", "Failed"):
+        pods = _kubectl([
+            "get", "pods", "-n", ns,
+            "--field-selector", f"status.phase={phase}",
+            "-o", "jsonpath={.items[*].metadata.name}",
+        ])
+        for pod in pods.split():
+            if not pod:
+                continue
+            _kubectl(["delete", "pod", pod, "-n", ns, "--force", "--grace-period=0"])
+            result["pods_deleted"] += 1
+    # Delete evicted pods (some k3s versions don't surface these as Failed)
+    evicted = _kubectl([
+        "get", "pods", "-n", ns, "-o",
+        'jsonpath={range .items[?(@.status.reason=="Evicted")]}{.metadata.name}{" "}{end}',
+    ])
+    for pod in evicted.split():
+        if not pod:
+            continue
+        _kubectl(["delete", "pod", pod, "-n", ns, "--force", "--grace-period=0"])
+        result["pods_deleted"] += 1
+    return result
+def _periodic_trim() -> None:
+    """Background thread: trim pods every TRIM_INTERVAL_S seconds."""
+    import time as _time
+    while True:
+        _time.sleep(TRIM_INTERVAL_S)
+        try:
+            if not executor.is_mock:
+                _run_kubectl_trim()
+        except Exception:
+            pass  # best-effort; next cycle will retry
+@app.on_event("startup")
+def _start_trim_thread() -> None:
+    """Start the background pod-trim thread on FastAPI startup."""
+    if not executor.is_mock:
+        t = threading.Thread(target=_periodic_trim, daemon=True, name="pod-trim")
+        t.start()
 def _now_utc_iso() -> str:
         "kubeconfig": executor.kubeconfig,
         "mapped_targets": sorted(list(executor._node_workload_map.keys())),
         "allowed_actions": sorted(list(_ALLOWED_ACTIONS)),
+        "trim_interval_s": TRIM_INTERVAL_S if not executor.is_mock else None,
     }
         "step_count": STATE["step_count"],
         "last_action": STATE["last_action"],
         "history_size": len(STATE["history"]),
+        "last_trim": STATE["last_trim"],
         "is_mock": executor.is_mock,
     }
+@app.post("/trim")
+def trim() -> dict[str, Any]:
+    """
+    On-demand pod trim: scale all deployments to min_replicas
+    and delete completed / failed / evicted pods.
+    """
+    if executor.is_mock:
+        raise HTTPException(
+            status_code=400,
+            detail="KubernetesExecutor is in mock mode. Set KUBECONFIG to enable trimming.",
+        )
+    try:
+        result = _run_kubectl_trim()
+    except Exception as exc:
+        raise HTTPException(status_code=500, detail=f"Trim failed: {exc}") from exc
+    STATE["last_trim"] = {
+        **result,
+        "timestamp_utc": _now_utc_iso(),
+    }
+    return STATE["last_trim"]
 @app.post("/step", response_model=ActionResponse)
 def step(action: ActionRequest) -> ActionResponse:
     if executor.is_mock: