| """Task: Kubernetes Pod Failures — MEDIUM. |
| |
| Agent fixes common pod failure scenarios: |
| OOMKilled, ImagePullBackOff, wrong command, missing ConfigMap, liveness probe. |
| """ |
|
|
| from server.models import TaskDifficulty |
| from server.tasks.base import BaseTask |
|
|
|
|
| class K8sPodTask(BaseTask): |
| NAME = "Kubernetes Pod Failures" |
| DESCRIPTION = "Fix Kubernetes pod failures including CrashLoopBackOff, ImagePullBackOff, and resource issues" |
| DIFFICULTY = TaskDifficulty.MEDIUM |
| AVAILABLE_SECRETS = [] |
|
|
| SCENARIOS = [ |
| |
| { |
| "id": "oom_killed", |
| "files": [ |
| { |
| "path": "k8s/deployment.yaml", |
| "type": "kubernetes", |
| "content": ( |
| "apiVersion: apps/v1\n" |
| "kind: Deployment\n" |
| "metadata:\n" |
| " name: api-server\n" |
| "spec:\n" |
| " replicas: 3\n" |
| " selector:\n" |
| " matchLabels:\n" |
| " app: api\n" |
| " template:\n" |
| " metadata:\n" |
| " labels:\n" |
| " app: api\n" |
| " spec:\n" |
| " containers:\n" |
| " - name: api\n" |
| ' image: myapp:v1.2.3\n' |
| " resources:\n" |
| " limits:\n" |
| ' memory: "64Mi"\n' |
| ' cpu: "100m"\n' |
| " ports:\n" |
| " - containerPort: 8080\n" |
| ), |
| } |
| ], |
| "error": { |
| "phase": "k8s_runtime", |
| "message": ( |
| "$ kubectl get pods\n" |
| "NAME READY STATUS RESTARTS AGE\n" |
| "api-server-7d4b8c9f5-x2k9m 0/1 CrashLoopBackOff 5 3m\n" |
| "\n" |
| "$ kubectl describe pod api-server-7d4b8c9f5-x2k9m\n" |
| "...\n" |
| "State: Waiting\n" |
| " Reason: CrashLoopBackOff\n" |
| "Last State: Terminated\n" |
| " Reason: OOMKilled\n" |
| " Exit Code: 137\n" |
| "...\n" |
| "Events:\n" |
| " Warning OOMKilling 3m kubelet Memory limit 64Mi exceeded" |
| ), |
| }, |
| "expected_fixes": [ |
| { |
| "file": "k8s/deployment.yaml", |
| "type": "contains", |
| "expected": 'memory: "256Mi"', |
| "hint": "Container is OOMKilled with 64Mi limit. The app needs at least 256Mi.", |
| } |
| ], |
| }, |
|
|
| |
| { |
| "id": "image_pull_backoff", |
| "files": [ |
| { |
| "path": "k8s/deployment.yaml", |
| "type": "kubernetes", |
| "content": ( |
| "apiVersion: apps/v1\n" |
| "kind: Deployment\n" |
| "metadata:\n" |
| " name: web-app\n" |
| "spec:\n" |
| " replicas: 2\n" |
| " selector:\n" |
| " matchLabels:\n" |
| " app: web\n" |
| " template:\n" |
| " metadata:\n" |
| " labels:\n" |
| " app: web\n" |
| " spec:\n" |
| " containers:\n" |
| " - name: web\n" |
| " image: nginx:latset\n" |
| " ports:\n" |
| " - containerPort: 80\n" |
| ), |
| } |
| ], |
| "error": { |
| "phase": "k8s_runtime", |
| "message": ( |
| "$ kubectl get pods\n" |
| "NAME READY STATUS RESTARTS AGE\n" |
| "web-app-5f8d7b6c4-abc12 0/1 ImagePullBackOff 0 2m\n" |
| "\n" |
| "$ kubectl describe pod web-app-5f8d7b6c4-abc12\n" |
| "...\n" |
| "Events:\n" |
| ' Warning Failed 2m kubelet Failed to pull image "nginx:latset": ' |
| "rpc error: code = NotFound desc = failed to pull and unpack image: " |
| "reference not found\n" |
| " Warning Failed 2m kubelet Error: ImagePullBackOff\n" |
| "..." |
| ), |
| }, |
| "expected_fixes": [ |
| { |
| "file": "k8s/deployment.yaml", |
| "type": "contains", |
| "expected": "image: nginx:latest", |
| "hint": "Image tag has a typo: 'latset' should be 'latest'", |
| } |
| ], |
| }, |
|
|
| |
| { |
| "id": "wrong_command", |
| "files": [ |
| { |
| "path": "k8s/deployment.yaml", |
| "type": "kubernetes", |
| "content": ( |
| "apiVersion: apps/v1\n" |
| "kind: Deployment\n" |
| "metadata:\n" |
| " name: worker\n" |
| "spec:\n" |
| " replicas: 1\n" |
| " selector:\n" |
| " matchLabels:\n" |
| " app: worker\n" |
| " template:\n" |
| " metadata:\n" |
| " labels:\n" |
| " app: worker\n" |
| " spec:\n" |
| " containers:\n" |
| " - name: worker\n" |
| " image: python:3.11-slim\n" |
| " command: [\"python\", \"workers.py\"]\n" |
| " resources:\n" |
| " limits:\n" |
| ' memory: "512Mi"\n' |
| ' cpu: "500m"\n' |
| ), |
| }, |
| { |
| "path": "app/worker.py", |
| "type": "other", |
| "content": ( |
| "import time\n" |
| "\n" |
| "def main():\n" |
| " while True:\n" |
| " print('Processing...')\n" |
| " time.sleep(5)\n" |
| "\n" |
| "if __name__ == '__main__':\n" |
| " main()\n" |
| ), |
| }, |
| ], |
| "error": { |
| "phase": "k8s_runtime", |
| "message": ( |
| "$ kubectl get pods\n" |
| "NAME READY STATUS RESTARTS AGE\n" |
| "worker-6b8f9d7c4-kj3m2 0/1 CrashLoopBackOff 4 2m\n" |
| "\n" |
| "$ kubectl logs worker-6b8f9d7c4-kj3m2\n" |
| "python: can't open file '/workers.py': [Errno 2] No such file or directory\n" |
| "\n" |
| "$ kubectl describe pod worker-6b8f9d7c4-kj3m2\n" |
| "...\n" |
| "State: Waiting\n" |
| " Reason: CrashLoopBackOff\n" |
| "Last State: Terminated\n" |
| " Reason: Error\n" |
| " Exit Code: 2\n" |
| "..." |
| ), |
| }, |
| "expected_fixes": [ |
| { |
| "file": "k8s/deployment.yaml", |
| "type": "contains", |
| "expected": 'command: ["python", "worker.py"]', |
| "hint": "The command references 'workers.py' but the file is named 'worker.py' (no 's')", |
| } |
| ], |
| }, |
|
|
| |
| { |
| "id": "missing_configmap", |
| "files": [ |
| { |
| "path": "k8s/deployment.yaml", |
| "type": "kubernetes", |
| "content": ( |
| "apiVersion: apps/v1\n" |
| "kind: Deployment\n" |
| "metadata:\n" |
| " name: backend\n" |
| "spec:\n" |
| " replicas: 2\n" |
| " selector:\n" |
| " matchLabels:\n" |
| " app: backend\n" |
| " template:\n" |
| " metadata:\n" |
| " labels:\n" |
| " app: backend\n" |
| " spec:\n" |
| " containers:\n" |
| " - name: backend\n" |
| " image: mybackend:v2.0\n" |
| " ports:\n" |
| " - containerPort: 8080\n" |
| " envFrom:\n" |
| " - configMapRef:\n" |
| " name: app-config\n" |
| " resources:\n" |
| " limits:\n" |
| ' memory: "512Mi"\n' |
| ' cpu: "500m"\n' |
| ), |
| }, |
| ], |
| "error": { |
| "phase": "k8s_runtime", |
| "message": ( |
| "$ kubectl get pods\n" |
| "NAME READY STATUS RESTARTS AGE\n" |
| "backend-5c9d8f7b6-lm4n5 0/1 CreateContainerConfigError 0 1m\n" |
| "\n" |
| "$ kubectl describe pod backend-5c9d8f7b6-lm4n5\n" |
| "...\n" |
| "Events:\n" |
| ' Warning Failed 1m kubelet Error: configmap "app-config" not found\n' |
| "..." |
| ), |
| }, |
| "expected_fixes": [ |
| { |
| "file": "k8s/configmap.yaml", |
| "type": "contains", |
| "expected": "name: app-config", |
| "hint": "The ConfigMap 'app-config' is referenced but doesn't exist. Create a ConfigMap manifest.", |
| } |
| ], |
| }, |
|
|
| |
| { |
| "id": "liveness_probe_failing", |
| "files": [ |
| { |
| "path": "k8s/deployment.yaml", |
| "type": "kubernetes", |
| "content": ( |
| "apiVersion: apps/v1\n" |
| "kind: Deployment\n" |
| "metadata:\n" |
| " name: api\n" |
| "spec:\n" |
| " replicas: 2\n" |
| " selector:\n" |
| " matchLabels:\n" |
| " app: api\n" |
| " template:\n" |
| " metadata:\n" |
| " labels:\n" |
| " app: api\n" |
| " spec:\n" |
| " containers:\n" |
| " - name: api\n" |
| " image: myapi:v3.1\n" |
| " ports:\n" |
| " - containerPort: 8080\n" |
| " livenessProbe:\n" |
| " httpGet:\n" |
| " path: /healthz\n" |
| " port: 3000\n" |
| " initialDelaySeconds: 5\n" |
| " periodSeconds: 10\n" |
| " readinessProbe:\n" |
| " httpGet:\n" |
| " path: /ready\n" |
| " port: 8080\n" |
| " initialDelaySeconds: 5\n" |
| " periodSeconds: 10\n" |
| " resources:\n" |
| " limits:\n" |
| ' memory: "512Mi"\n' |
| ' cpu: "500m"\n' |
| ), |
| }, |
| ], |
| "error": { |
| "phase": "k8s_runtime", |
| "message": ( |
| "$ kubectl get pods\n" |
| "NAME READY STATUS RESTARTS AGE\n" |
| "api-7f8d9c6b5-gh7j8 0/1 Running 3 (30s ago) 2m\n" |
| "\n" |
| "$ kubectl describe pod api-7f8d9c6b5-gh7j8\n" |
| "...\n" |
| "Events:\n" |
| " Warning Unhealthy 90s kubelet Liveness probe failed: " |
| "Get \"http://10.244.0.5:3000/healthz\": dial tcp 10.244.0.5:3000: " |
| "connect: connection refused\n" |
| " Normal Killing 90s kubelet Container api failed liveness probe, " |
| "will be restarted\n" |
| "...\n" |
| "\n" |
| "Note: The application listens on port 8080, not 3000." |
| ), |
| }, |
| "expected_fixes": [ |
| { |
| "file": "k8s/deployment.yaml", |
| "type": "contains", |
| "expected": "port: 8080\n initialDelaySeconds: 5\n periodSeconds: 10\n readinessProbe:", |
| "hint": "The liveness probe port (3000) doesn't match the container port (8080). Change liveness probe port to 8080.", |
| } |
| ], |
| }, |
| ] |
|
|