File size: 3,679 Bytes
c8f3b98
85b7ac8
 
c8f3b98
 
85b7ac8
 
 
 
 
 
 
c8f3b98
85b7ac8
 
c8f3b98
85b7ac8
 
 
 
 
 
 
 
 
 
 
 
4b07aaf
85b7ac8
 
 
 
 
4b07aaf
85b7ac8
 
 
 
 
4b07aaf
85b7ac8
 
 
 
 
4b07aaf
85b7ac8
 
 
 
4b07aaf
 
85b7ac8
 
 
 
 
4b07aaf
85b7ac8
c8f3b98
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85b7ac8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c8f3b98
 
 
 
 
 
 
 
 
 
 
 
85b7ac8
 
 
 
4b07aaf
85b7ac8
 
4b07aaf
85b7ac8
4b07aaf
c8f3b98
 
 
 
85b7ac8
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
name: cloud-native-devops-env
version: "1.0.0"
description: >
  Debug broken GitHub Actions workflows, Dockerfiles, and Kubernetes manifests.
  AI agents identify and fix cloud-native deployment pipeline issues.

author: Krishna
license: MIT
tags:
  - devops
  - docker
  - github-actions
  - kubernetes
  - debugging
  - infrastructure
  - cloud-native

environment:
  type: text
  observation_space: structured
  action_space: structured
  max_steps: 10

tasks:
  - id: dockerfile_syntax
    name: Dockerfile Syntax Errors
    description: Fix syntax and instruction errors in Dockerfiles
    difficulty: easy
    num_scenarios: 5

  - id: dockerfile_runtime
    name: Dockerfile Runtime Errors
    description: Fix runtime/container execution issues in Dockerfiles
    difficulty: medium
    num_scenarios: 5

  - id: workflow_syntax_structure
    name: Workflow Syntax and Structure
    description: Fix GitHub Actions YAML syntax and job structure issues
    difficulty: easy
    num_scenarios: 5

  - id: workflow_secrets_permissions
    name: Workflow Secrets and Permissions
    description: Fix secret wiring, env usage, and permissions in workflows
    difficulty: medium
    num_scenarios: 5

  - id: ci_docker_integration
    name: CI and Docker Build Integration
    description: Debug combined workflow and Docker build integration failures
    difficulty: medium-hard
    num_scenarios: 5

  - id: multi_stage_pipeline_matrix
    name: Multi-Stage Pipeline and Matrix
    description: Debug complex multi-stage and matrix CI/CD pipelines
    difficulty: hard
    num_scenarios: 5

  - id: k8s_pod_failures
    name: Kubernetes Pod Failures
    description: Fix Kubernetes pod failures including CrashLoopBackOff, ImagePullBackOff, and resource issues
    difficulty: medium
    num_scenarios: 5

  - id: k8s_networking
    name: Kubernetes Service & Ingress Issues
    description: Fix Kubernetes networking issues including Service selectors, port mismatches, and Ingress configuration
    difficulty: hard
    num_scenarios: 5

  - id: pipeline_build_deploy
    name: CI/CD Build & Push Pipeline
    description: Debug GHA-to-Docker-to-Registry pipeline failures across multiple files
    difficulty: hard
    num_scenarios: 5

  - id: pipeline_full_stack
    name: Full Stack Deployment Pipeline
    description: Debug complex multi-error deployment pipelines across GHA workflows, Dockerfiles, and Kubernetes manifests
    difficulty: expert
    num_scenarios: 5

graders:
  dockerfile_syntax:
    type: deterministic
    score_range: [0.0, 1.0]
  dockerfile_runtime:
    type: deterministic
    score_range: [0.0, 1.0]
  workflow_syntax_structure:
    type: deterministic
    score_range: [0.0, 1.0]
  workflow_secrets_permissions:
    type: deterministic
    score_range: [0.0, 1.0]
  ci_docker_integration:
    type: deterministic
    score_range: [0.0, 1.0]
  multi_stage_pipeline_matrix:
    type: deterministic
    score_range: [0.0, 1.0]
  k8s_pod_failures:
    type: deterministic
    score_range: [0.0, 1.0]
  k8s_networking:
    type: deterministic
    score_range: [0.0, 1.0]
  pipeline_build_deploy:
    type: deterministic
    score_range: [0.0, 1.0]
  pipeline_full_stack:
    type: deterministic
    score_range: [0.0, 1.0]

baseline:
  script: inference.py
  expected_scores:
    dockerfile_syntax: 0.70
    dockerfile_runtime: 0.55
    workflow_syntax_structure: 0.65
    workflow_secrets_permissions: 0.50
    ci_docker_integration: 0.45
    multi_stage_pipeline_matrix: 0.30
    k8s_pod_failures: 0.50
    k8s_networking: 0.40
    pipeline_build_deploy: 0.35
    pipeline_full_stack: 0.20

resources:
  vcpu: 2
  memory: 8gb
  timeout: 1200