"""Task: Kubernetes Pod Failures — MEDIUM. Agent fixes common pod failure scenarios: OOMKilled, ImagePullBackOff, wrong command, missing ConfigMap, liveness probe. """ from server.models import TaskDifficulty from server.tasks.base import BaseTask class K8sPodTask(BaseTask): NAME = "Kubernetes Pod Failures" DESCRIPTION = "Fix Kubernetes pod failures including CrashLoopBackOff, ImagePullBackOff, and resource issues" DIFFICULTY = TaskDifficulty.MEDIUM AVAILABLE_SECRETS = [] SCENARIOS = [ # Scenario 1: CrashLoopBackOff — OOMKilled (memory limit too low) { "id": "oom_killed", "files": [ { "path": "k8s/deployment.yaml", "type": "kubernetes", "content": ( "apiVersion: apps/v1\n" "kind: Deployment\n" "metadata:\n" " name: api-server\n" "spec:\n" " replicas: 3\n" " selector:\n" " matchLabels:\n" " app: api\n" " template:\n" " metadata:\n" " labels:\n" " app: api\n" " spec:\n" " containers:\n" " - name: api\n" ' image: myapp:v1.2.3\n' " resources:\n" " limits:\n" ' memory: "64Mi"\n' ' cpu: "100m"\n' " ports:\n" " - containerPort: 8080\n" ), } ], "error": { "phase": "k8s_runtime", "message": ( "$ kubectl get pods\n" "NAME READY STATUS RESTARTS AGE\n" "api-server-7d4b8c9f5-x2k9m 0/1 CrashLoopBackOff 5 3m\n" "\n" "$ kubectl describe pod api-server-7d4b8c9f5-x2k9m\n" "...\n" "State: Waiting\n" " Reason: CrashLoopBackOff\n" "Last State: Terminated\n" " Reason: OOMKilled\n" " Exit Code: 137\n" "...\n" "Events:\n" " Warning OOMKilling 3m kubelet Memory limit 64Mi exceeded" ), }, "expected_fixes": [ { "file": "k8s/deployment.yaml", "type": "contains", "expected": 'memory: "256Mi"', "hint": "Container is OOMKilled with 64Mi limit. The app needs at least 256Mi.", } ], }, # Scenario 2: ImagePullBackOff — image tag typo { "id": "image_pull_backoff", "files": [ { "path": "k8s/deployment.yaml", "type": "kubernetes", "content": ( "apiVersion: apps/v1\n" "kind: Deployment\n" "metadata:\n" " name: web-app\n" "spec:\n" " replicas: 2\n" " selector:\n" " matchLabels:\n" " app: web\n" " template:\n" " metadata:\n" " labels:\n" " app: web\n" " spec:\n" " containers:\n" " - name: web\n" " image: nginx:latset\n" " ports:\n" " - containerPort: 80\n" ), } ], "error": { "phase": "k8s_runtime", "message": ( "$ kubectl get pods\n" "NAME READY STATUS RESTARTS AGE\n" "web-app-5f8d7b6c4-abc12 0/1 ImagePullBackOff 0 2m\n" "\n" "$ kubectl describe pod web-app-5f8d7b6c4-abc12\n" "...\n" "Events:\n" ' Warning Failed 2m kubelet Failed to pull image "nginx:latset": ' "rpc error: code = NotFound desc = failed to pull and unpack image: " "reference not found\n" " Warning Failed 2m kubelet Error: ImagePullBackOff\n" "..." ), }, "expected_fixes": [ { "file": "k8s/deployment.yaml", "type": "contains", "expected": "image: nginx:latest", "hint": "Image tag has a typo: 'latset' should be 'latest'", } ], }, # Scenario 3: CrashLoopBackOff — wrong command { "id": "wrong_command", "files": [ { "path": "k8s/deployment.yaml", "type": "kubernetes", "content": ( "apiVersion: apps/v1\n" "kind: Deployment\n" "metadata:\n" " name: worker\n" "spec:\n" " replicas: 1\n" " selector:\n" " matchLabels:\n" " app: worker\n" " template:\n" " metadata:\n" " labels:\n" " app: worker\n" " spec:\n" " containers:\n" " - name: worker\n" " image: python:3.11-slim\n" " command: [\"python\", \"workers.py\"]\n" " resources:\n" " limits:\n" ' memory: "512Mi"\n' ' cpu: "500m"\n' ), }, { "path": "app/worker.py", "type": "other", "content": ( "import time\n" "\n" "def main():\n" " while True:\n" " print('Processing...')\n" " time.sleep(5)\n" "\n" "if __name__ == '__main__':\n" " main()\n" ), }, ], "error": { "phase": "k8s_runtime", "message": ( "$ kubectl get pods\n" "NAME READY STATUS RESTARTS AGE\n" "worker-6b8f9d7c4-kj3m2 0/1 CrashLoopBackOff 4 2m\n" "\n" "$ kubectl logs worker-6b8f9d7c4-kj3m2\n" "python: can't open file '/workers.py': [Errno 2] No such file or directory\n" "\n" "$ kubectl describe pod worker-6b8f9d7c4-kj3m2\n" "...\n" "State: Waiting\n" " Reason: CrashLoopBackOff\n" "Last State: Terminated\n" " Reason: Error\n" " Exit Code: 2\n" "..." ), }, "expected_fixes": [ { "file": "k8s/deployment.yaml", "type": "contains", "expected": 'command: ["python", "worker.py"]', "hint": "The command references 'workers.py' but the file is named 'worker.py' (no 's')", } ], }, # Scenario 4: CreateContainerConfigError — missing ConfigMap { "id": "missing_configmap", "files": [ { "path": "k8s/deployment.yaml", "type": "kubernetes", "content": ( "apiVersion: apps/v1\n" "kind: Deployment\n" "metadata:\n" " name: backend\n" "spec:\n" " replicas: 2\n" " selector:\n" " matchLabels:\n" " app: backend\n" " template:\n" " metadata:\n" " labels:\n" " app: backend\n" " spec:\n" " containers:\n" " - name: backend\n" " image: mybackend:v2.0\n" " ports:\n" " - containerPort: 8080\n" " envFrom:\n" " - configMapRef:\n" " name: app-config\n" " resources:\n" " limits:\n" ' memory: "512Mi"\n' ' cpu: "500m"\n' ), }, ], "error": { "phase": "k8s_runtime", "message": ( "$ kubectl get pods\n" "NAME READY STATUS RESTARTS AGE\n" "backend-5c9d8f7b6-lm4n5 0/1 CreateContainerConfigError 0 1m\n" "\n" "$ kubectl describe pod backend-5c9d8f7b6-lm4n5\n" "...\n" "Events:\n" ' Warning Failed 1m kubelet Error: configmap "app-config" not found\n' "..." ), }, "expected_fixes": [ { "file": "k8s/configmap.yaml", "type": "contains", "expected": "name: app-config", "hint": "The ConfigMap 'app-config' is referenced but doesn't exist. Create a ConfigMap manifest.", } ], }, # Scenario 5: Pod not ready — liveness probe failing { "id": "liveness_probe_failing", "files": [ { "path": "k8s/deployment.yaml", "type": "kubernetes", "content": ( "apiVersion: apps/v1\n" "kind: Deployment\n" "metadata:\n" " name: api\n" "spec:\n" " replicas: 2\n" " selector:\n" " matchLabels:\n" " app: api\n" " template:\n" " metadata:\n" " labels:\n" " app: api\n" " spec:\n" " containers:\n" " - name: api\n" " image: myapi:v3.1\n" " ports:\n" " - containerPort: 8080\n" " livenessProbe:\n" " httpGet:\n" " path: /healthz\n" " port: 3000\n" " initialDelaySeconds: 5\n" " periodSeconds: 10\n" " readinessProbe:\n" " httpGet:\n" " path: /ready\n" " port: 8080\n" " initialDelaySeconds: 5\n" " periodSeconds: 10\n" " resources:\n" " limits:\n" ' memory: "512Mi"\n' ' cpu: "500m"\n' ), }, ], "error": { "phase": "k8s_runtime", "message": ( "$ kubectl get pods\n" "NAME READY STATUS RESTARTS AGE\n" "api-7f8d9c6b5-gh7j8 0/1 Running 3 (30s ago) 2m\n" "\n" "$ kubectl describe pod api-7f8d9c6b5-gh7j8\n" "...\n" "Events:\n" " Warning Unhealthy 90s kubelet Liveness probe failed: " "Get \"http://10.244.0.5:3000/healthz\": dial tcp 10.244.0.5:3000: " "connect: connection refused\n" " Normal Killing 90s kubelet Container api failed liveness probe, " "will be restarted\n" "...\n" "\n" "Note: The application listens on port 8080, not 3000." ), }, "expected_fixes": [ { "file": "k8s/deployment.yaml", "type": "contains", "expected": "port: 8080\n initialDelaySeconds: 5\n periodSeconds: 10\n readinessProbe:", "hint": "The liveness probe port (3000) doesn't match the container port (8080). Change liveness probe port to 8080.", } ], }, ]