Krishna1107 commited on
Commit
804f70e
Β·
1 Parent(s): 88ffd96

deployment fixes, hf space fixes

Browse files
Files changed (6) hide show
  1. .gitignore +1 -0
  2. Dockerfile +1 -1
  3. inference.py +20 -12
  4. server/main.py +12 -3
  5. server/static/index.html +863 -0
  6. tests/test_endpoints.py +8 -2
.gitignore CHANGED
@@ -42,3 +42,4 @@ Thumbs.db
42
  *.zip
43
 
44
  context/
 
 
42
  *.zip
43
 
44
  context/
45
+ tutorial_references/
Dockerfile CHANGED
@@ -4,7 +4,7 @@ WORKDIR /app
4
 
5
  # Install dependencies first (layer caching)
6
  COPY requirements.txt .
7
- RUN pip install --no-cache-dir -r requirements.txt
8
 
9
  # Copy application code
10
  COPY server/ ./server/
 
4
 
5
  # Install dependencies first (layer caching)
6
  COPY requirements.txt .
7
+ RUN pip install --no-cache-dir -r requirements.txt && pip install --no-cache-dir aiofiles
8
 
9
  # Copy application code
10
  COPY server/ ./server/
inference.py CHANGED
@@ -24,10 +24,11 @@ from openai import OpenAI
24
 
25
  # ── Configuration ─────────────────────────────────────────────────
26
 
27
- API_BASE_URL = os.environ.get("API_BASE_URL", "https://router.huggingface.co/v1")
28
- MODEL_NAME = os.environ.get("MODEL_NAME", "meta-llama/Llama-3.1-70B-Instruct")
29
- HF_TOKEN = os.environ.get("HF_TOKEN", "")
30
- ENV_URL = os.environ.get("ENV_URL", "http://localhost:8000")
 
31
  MAX_STEPS = 8 # leave 2 steps buffer before env hard-limit of 10
32
 
33
  SYSTEM_PROMPT = """You are an expert DevOps engineer debugging CI/CD pipelines.
@@ -70,7 +71,7 @@ def create_client() -> OpenAI:
70
  """Create OpenAI-compatible client for HuggingFace router."""
71
  return OpenAI(
72
  base_url=API_BASE_URL,
73
- api_key=HF_TOKEN or "dummy",
74
  )
75
 
76
 
@@ -182,10 +183,12 @@ def run_episode(client: OpenAI, task_id: Optional[str] = None, scenario_id: Opti
182
  actual_task_id = info.get("task_id", task_id or "unknown")
183
  actual_scenario_id = info.get("scenario_id", scenario_id or "unknown")
184
 
185
- print(f" Episode: task={actual_task_id}, scenario={actual_scenario_id}")
 
186
 
187
  messages = [{"role": "system", "content": SYSTEM_PROMPT}]
188
  trajectory = []
 
189
 
190
  for step_num in range(MAX_STEPS):
191
  user_msg = format_observation(obs)
@@ -200,7 +203,7 @@ def run_episode(client: OpenAI, task_id: Optional[str] = None, scenario_id: Opti
200
  )
201
  llm_text = completion.choices[0].message.content or '{"action": "submit"}'
202
  except Exception as e:
203
- print(f" LLM error at step {step_num + 1}: {e}")
204
  llm_text = '{"action": "submit"}'
205
 
206
  messages.append({"role": "assistant", "content": llm_text})
@@ -208,18 +211,21 @@ def run_episode(client: OpenAI, task_id: Optional[str] = None, scenario_id: Opti
208
  parsed = parse_llm_response(llm_text)
209
  action = build_action(parsed)
210
 
211
- print(f" Step {step_num + 1}: {action['action_type']}", end="")
212
-
213
  step_resp = env_request("POST", "/step", {"action": action})
214
  obs = step_resp["observation"]
215
  reward = step_resp.get("reward", 0.0)
216
  done = step_resp.get("done", False)
217
  step_info = step_resp.get("info", {})
 
 
 
 
218
 
219
- print(f" -> reward={reward:.2f}, fixed={step_info.get('issues_fixed', '?')}/{step_info.get('issues_total', '?')}")
 
220
 
221
  trajectory.append({
222
- "step": step_num + 1,
223
  "action": action,
224
  "reward": reward,
225
  "done": done,
@@ -236,7 +242,9 @@ def run_episode(client: OpenAI, task_id: Optional[str] = None, scenario_id: Opti
236
  })
237
  result = grade_resp.get("result", {})
238
  score = result.get("score", 0.0)
239
- print(f" Score: {score:.3f} | {result.get('feedback', '')}")
 
 
240
  return result
241
 
242
 
 
24
 
25
  # ── Configuration ─────────────────────────────────────────────────
26
 
27
+ API_BASE_URL = os.getenv("API_BASE_URL", "https://router.huggingface.co/v1")
28
+ MODEL_NAME = os.getenv("MODEL_NAME", "meta-llama/Llama-3.1-70B-Instruct")
29
+ HF_TOKEN = os.getenv("HF_TOKEN")
30
+ ENV_URL = os.getenv("ENV_URL", "http://localhost:8000")
31
+ LOCAL_IMAGE_NAME = os.getenv("LOCAL_IMAGE_NAME")
32
  MAX_STEPS = 8 # leave 2 steps buffer before env hard-limit of 10
33
 
34
  SYSTEM_PROMPT = """You are an expert DevOps engineer debugging CI/CD pipelines.
 
71
  """Create OpenAI-compatible client for HuggingFace router."""
72
  return OpenAI(
73
  base_url=API_BASE_URL,
74
+ api_key=HF_TOKEN,
75
  )
76
 
77
 
 
183
  actual_task_id = info.get("task_id", task_id or "unknown")
184
  actual_scenario_id = info.get("scenario_id", scenario_id or "unknown")
185
 
186
+ # ── [START] structured log ──
187
+ print(f"[START] task_id={actual_task_id} scenario_id={actual_scenario_id}")
188
 
189
  messages = [{"role": "system", "content": SYSTEM_PROMPT}]
190
  trajectory = []
191
+ total_steps = 0
192
 
193
  for step_num in range(MAX_STEPS):
194
  user_msg = format_observation(obs)
 
203
  )
204
  llm_text = completion.choices[0].message.content or '{"action": "submit"}'
205
  except Exception as e:
206
+ print(f"[STEP] step={step_num + 1} action=error reward=0.00 done=false issues_fixed=0 issues_total=0 error={e}")
207
  llm_text = '{"action": "submit"}'
208
 
209
  messages.append({"role": "assistant", "content": llm_text})
 
211
  parsed = parse_llm_response(llm_text)
212
  action = build_action(parsed)
213
 
 
 
214
  step_resp = env_request("POST", "/step", {"action": action})
215
  obs = step_resp["observation"]
216
  reward = step_resp.get("reward", 0.0)
217
  done = step_resp.get("done", False)
218
  step_info = step_resp.get("info", {})
219
+ total_steps = step_num + 1
220
+
221
+ issues_fixed = step_info.get("issues_fixed", 0)
222
+ issues_total = step_info.get("issues_total", 0)
223
 
224
+ # ── [STEP] structured log ──
225
+ print(f"[STEP] step={total_steps} action={action['action_type']} reward={reward:.2f} done={str(done).lower()} issues_fixed={issues_fixed} issues_total={issues_total}")
226
 
227
  trajectory.append({
228
+ "step": total_steps,
229
  "action": action,
230
  "reward": reward,
231
  "done": done,
 
242
  })
243
  result = grade_resp.get("result", {})
244
  score = result.get("score", 0.0)
245
+
246
+ # ── [END] structured log ──
247
+ print(f"[END] task_id={actual_task_id} scenario_id={actual_scenario_id} score={score:.3f} steps={total_steps}")
248
  return result
249
 
250
 
server/main.py CHANGED
@@ -2,11 +2,14 @@
2
 
3
  from __future__ import annotations
4
 
 
5
  from typing import Optional
6
 
7
  import uvicorn
8
  from fastapi import FastAPI, HTTPException
9
  from fastapi.middleware.cors import CORSMiddleware
 
 
10
 
11
  from server.environment import CICDDebugEnvironment
12
  from server.graders import run_grader
@@ -27,8 +30,10 @@ from server.models import (
27
  )
28
  from server.tasks.task_registry import TASK_REGISTRY
29
 
 
 
30
  app = FastAPI(
31
- title="CI/CD Debug Environment",
32
  description="OpenEnv-style environment for Docker + GitHub Actions debugging",
33
  version="1.0.0",
34
  )
@@ -41,12 +46,16 @@ app.add_middleware(
41
  allow_headers=["*"],
42
  )
43
 
 
 
 
44
  env: Optional[CICDDebugEnvironment] = None
45
 
46
 
47
- @app.get("/")
48
  async def root():
49
- return {"status": "healthy", "environment": "cicd-docker-env"}
 
50
 
51
 
52
  @app.get("/health")
 
2
 
3
  from __future__ import annotations
4
 
5
+ from pathlib import Path
6
  from typing import Optional
7
 
8
  import uvicorn
9
  from fastapi import FastAPI, HTTPException
10
  from fastapi.middleware.cors import CORSMiddleware
11
+ from fastapi.responses import HTMLResponse
12
+ from fastapi.staticfiles import StaticFiles
13
 
14
  from server.environment import CICDDebugEnvironment
15
  from server.graders import run_grader
 
30
  )
31
  from server.tasks.task_registry import TASK_REGISTRY
32
 
33
+ STATIC_DIR = Path(__file__).resolve().parent / "static"
34
+
35
  app = FastAPI(
36
+ title="CI/CD + Docker Debug Environment",
37
  description="OpenEnv-style environment for Docker + GitHub Actions debugging",
38
  version="1.0.0",
39
  )
 
46
  allow_headers=["*"],
47
  )
48
 
49
+ # Serve static assets (CSS, JS, images if needed later)
50
+ app.mount("/static", StaticFiles(directory=str(STATIC_DIR)), name="static")
51
+
52
  env: Optional[CICDDebugEnvironment] = None
53
 
54
 
55
+ @app.get("/", response_class=HTMLResponse)
56
  async def root():
57
+ html_path = STATIC_DIR / "index.html"
58
+ return HTMLResponse(content=html_path.read_text(encoding="utf-8"), status_code=200)
59
 
60
 
61
  @app.get("/health")
server/static/index.html ADDED
@@ -0,0 +1,863 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>CI/CD + Docker Debug Environment</title>
7
+ <meta name="description" content="OpenEnv environment where AI agents learn to debug broken GitHub Actions workflows and Dockerfiles.">
8
+ <link rel="preconnect" href="https://fonts.googleapis.com">
9
+ <link href="https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700;800&family=JetBrains+Mono:wght@400;500&display=swap" rel="stylesheet">
10
+ <style>
11
+ *, *::before, *::after { margin: 0; padding: 0; box-sizing: border-box; }
12
+
13
+ :root {
14
+ --bg-primary: #0a0e1a;
15
+ --bg-secondary: #111827;
16
+ --bg-card: rgba(17, 24, 39, 0.6);
17
+ --border-card: rgba(99, 102, 241, 0.15);
18
+ --text-primary: #f1f5f9;
19
+ --text-secondary: #94a3b8;
20
+ --text-muted: #64748b;
21
+ --accent-indigo: #818cf8;
22
+ --accent-blue: #60a5fa;
23
+ --accent-cyan: #22d3ee;
24
+ --accent-emerald: #34d399;
25
+ --accent-amber: #fbbf24;
26
+ --accent-rose: #fb7185;
27
+ --accent-purple: #a78bfa;
28
+ --gradient-primary: linear-gradient(135deg, #818cf8, #60a5fa, #22d3ee);
29
+ --gradient-warm: linear-gradient(135deg, #f97316, #fb7185, #a78bfa);
30
+ }
31
+
32
+ html { scroll-behavior: smooth; }
33
+
34
+ body {
35
+ font-family: 'Inter', -apple-system, BlinkMacSystemFont, sans-serif;
36
+ background: var(--bg-primary);
37
+ color: var(--text-primary);
38
+ line-height: 1.7;
39
+ overflow-x: hidden;
40
+ }
41
+
42
+ /* ── Animated Background ── */
43
+ .bg-grid {
44
+ position: fixed;
45
+ inset: 0;
46
+ z-index: 0;
47
+ background-image:
48
+ radial-gradient(circle at 20% 30%, rgba(99,102,241,0.08) 0%, transparent 50%),
49
+ radial-gradient(circle at 80% 70%, rgba(34,211,238,0.06) 0%, transparent 50%),
50
+ radial-gradient(circle at 50% 50%, rgba(168,85,247,0.04) 0%, transparent 60%);
51
+ animation: bgPulse 12s ease-in-out infinite alternate;
52
+ }
53
+ .bg-grid::after {
54
+ content: '';
55
+ position: absolute;
56
+ inset: 0;
57
+ background-image: linear-gradient(rgba(99,102,241,0.03) 1px, transparent 1px),
58
+ linear-gradient(90deg, rgba(99,102,241,0.03) 1px, transparent 1px);
59
+ background-size: 60px 60px;
60
+ }
61
+ @keyframes bgPulse {
62
+ 0% { opacity: 1; }
63
+ 100% { opacity: 0.6; transform: scale(1.02); }
64
+ }
65
+
66
+ /* ── Main Container ── */
67
+ .container {
68
+ position: relative;
69
+ z-index: 1;
70
+ max-width: 1100px;
71
+ margin: 0 auto;
72
+ padding: 0 24px;
73
+ }
74
+
75
+ /* ── Hero Section ── */
76
+ .hero {
77
+ text-align: center;
78
+ padding: 80px 0 60px;
79
+ }
80
+ .hero-badge {
81
+ display: inline-flex;
82
+ align-items: center;
83
+ gap: 8px;
84
+ padding: 6px 16px;
85
+ border-radius: 100px;
86
+ background: rgba(99,102,241,0.1);
87
+ border: 1px solid rgba(99,102,241,0.25);
88
+ font-size: 0.8rem;
89
+ font-weight: 500;
90
+ color: var(--accent-indigo);
91
+ letter-spacing: 0.5px;
92
+ margin-bottom: 28px;
93
+ animation: fadeInDown 0.6s ease-out;
94
+ }
95
+ .hero-badge .dot {
96
+ width: 7px; height: 7px;
97
+ border-radius: 50%;
98
+ background: var(--accent-emerald);
99
+ animation: pulse 2s ease-in-out infinite;
100
+ }
101
+ @keyframes pulse {
102
+ 0%, 100% { opacity: 1; box-shadow: 0 0 0 0 rgba(52,211,153,0.5); }
103
+ 50% { opacity: 0.7; box-shadow: 0 0 0 6px rgba(52,211,153,0); }
104
+ }
105
+ @keyframes fadeInDown {
106
+ from { opacity: 0; transform: translateY(-16px); }
107
+ to { opacity: 1; transform: translateY(0); }
108
+ }
109
+ @keyframes fadeInUp {
110
+ from { opacity: 0; transform: translateY(20px); }
111
+ to { opacity: 1; transform: translateY(0); }
112
+ }
113
+
114
+ .hero h1 {
115
+ font-size: clamp(2.2rem, 5vw, 3.4rem);
116
+ font-weight: 800;
117
+ line-height: 1.15;
118
+ letter-spacing: -1.5px;
119
+ margin-bottom: 20px;
120
+ animation: fadeInUp 0.7s ease-out 0.1s both;
121
+ }
122
+ .hero h1 .gradient-text {
123
+ background: var(--gradient-primary);
124
+ -webkit-background-clip: text;
125
+ -webkit-text-fill-color: transparent;
126
+ background-clip: text;
127
+ }
128
+ .hero p {
129
+ font-size: 1.15rem;
130
+ color: var(--text-secondary);
131
+ max-width: 650px;
132
+ margin: 0 auto 36px;
133
+ animation: fadeInUp 0.7s ease-out 0.2s both;
134
+ }
135
+
136
+ .hero-actions {
137
+ display: flex;
138
+ gap: 14px;
139
+ justify-content: center;
140
+ flex-wrap: wrap;
141
+ animation: fadeInUp 0.7s ease-out 0.3s both;
142
+ }
143
+ .btn {
144
+ display: inline-flex;
145
+ align-items: center;
146
+ gap: 8px;
147
+ padding: 12px 26px;
148
+ border-radius: 12px;
149
+ font-family: inherit;
150
+ font-size: 0.9rem;
151
+ font-weight: 600;
152
+ text-decoration: none;
153
+ cursor: pointer;
154
+ border: none;
155
+ transition: all 0.25s ease;
156
+ }
157
+ .btn-primary {
158
+ background: var(--gradient-primary);
159
+ color: #0a0e1a;
160
+ }
161
+ .btn-primary:hover { transform: translateY(-2px); box-shadow: 0 8px 30px rgba(99,102,241,0.3); }
162
+ .btn-secondary {
163
+ background: rgba(99,102,241,0.1);
164
+ border: 1px solid rgba(99,102,241,0.25);
165
+ color: var(--accent-indigo);
166
+ }
167
+ .btn-secondary:hover { background: rgba(99,102,241,0.18); transform: translateY(-2px); }
168
+
169
+ /* ── Stats Strip ── */
170
+ .stats {
171
+ display: grid;
172
+ grid-template-columns: repeat(4, 1fr);
173
+ gap: 16px;
174
+ margin: 0 0 64px;
175
+ animation: fadeInUp 0.7s ease-out 0.4s both;
176
+ }
177
+ .stat-card {
178
+ text-align: center;
179
+ padding: 24px 16px;
180
+ border-radius: 16px;
181
+ background: var(--bg-card);
182
+ border: 1px solid var(--border-card);
183
+ backdrop-filter: blur(12px);
184
+ transition: border-color 0.3s;
185
+ }
186
+ .stat-card:hover { border-color: rgba(99,102,241,0.35); }
187
+ .stat-number {
188
+ font-size: 2rem;
189
+ font-weight: 800;
190
+ letter-spacing: -1px;
191
+ background: var(--gradient-primary);
192
+ -webkit-background-clip: text;
193
+ -webkit-text-fill-color: transparent;
194
+ background-clip: text;
195
+ }
196
+ .stat-label {
197
+ font-size: 0.82rem;
198
+ color: var(--text-muted);
199
+ margin-top: 4px;
200
+ font-weight: 500;
201
+ }
202
+
203
+ /* ── Section Headers ── */
204
+ .section { margin-bottom: 64px; }
205
+ .section-header {
206
+ margin-bottom: 28px;
207
+ }
208
+ .section-header h2 {
209
+ font-size: 1.6rem;
210
+ font-weight: 700;
211
+ letter-spacing: -0.5px;
212
+ margin-bottom: 8px;
213
+ }
214
+ .section-header p {
215
+ color: var(--text-secondary);
216
+ font-size: 0.95rem;
217
+ }
218
+
219
+ /* ── Task Cards ── */
220
+ .task-grid {
221
+ display: grid;
222
+ grid-template-columns: repeat(auto-fill, minmax(320px, 1fr));
223
+ gap: 18px;
224
+ }
225
+ .task-card {
226
+ padding: 24px;
227
+ border-radius: 16px;
228
+ background: var(--bg-card);
229
+ border: 1px solid var(--border-card);
230
+ backdrop-filter: blur(12px);
231
+ transition: all 0.3s ease;
232
+ position: relative;
233
+ overflow: hidden;
234
+ }
235
+ .task-card::before {
236
+ content: '';
237
+ position: absolute;
238
+ top: 0; left: 0; right: 0;
239
+ height: 3px;
240
+ border-radius: 16px 16px 0 0;
241
+ }
242
+ .task-card:hover {
243
+ border-color: rgba(99,102,241,0.35);
244
+ transform: translateY(-4px);
245
+ box-shadow: 0 12px 40px rgba(0,0,0,0.3);
246
+ }
247
+ .task-card.easy::before { background: linear-gradient(90deg, #34d399, #22d3ee); }
248
+ .task-card.medium::before { background: linear-gradient(90deg, #fbbf24, #f97316); }
249
+ .task-card.hard::before { background: linear-gradient(90deg, #fb7185, #a78bfa); }
250
+
251
+ .task-header {
252
+ display: flex;
253
+ align-items: flex-start;
254
+ justify-content: space-between;
255
+ margin-bottom: 12px;
256
+ }
257
+ .task-id {
258
+ font-family: 'JetBrains Mono', monospace;
259
+ font-size: 0.78rem;
260
+ color: var(--accent-indigo);
261
+ background: rgba(99,102,241,0.1);
262
+ padding: 3px 10px;
263
+ border-radius: 6px;
264
+ }
265
+ .difficulty-badge {
266
+ font-size: 0.72rem;
267
+ font-weight: 600;
268
+ padding: 3px 10px;
269
+ border-radius: 100px;
270
+ text-transform: uppercase;
271
+ letter-spacing: 0.5px;
272
+ }
273
+ .difficulty-badge.easy { background: rgba(52,211,153,0.12); color: #34d399; }
274
+ .difficulty-badge.medium { background: rgba(251,191,36,0.12); color: #fbbf24; }
275
+ .difficulty-badge.medium-hard { background: rgba(249,115,22,0.12); color: #f97316; }
276
+ .difficulty-badge.hard { background: rgba(251,113,133,0.12); color: #fb7185; }
277
+
278
+ .task-card h3 {
279
+ font-size: 1.05rem;
280
+ font-weight: 600;
281
+ margin-bottom: 8px;
282
+ }
283
+ .task-card p {
284
+ font-size: 0.88rem;
285
+ color: var(--text-secondary);
286
+ line-height: 1.6;
287
+ }
288
+ .task-scenarios {
289
+ margin-top: 14px;
290
+ font-size: 0.78rem;
291
+ color: var(--text-muted);
292
+ font-weight: 500;
293
+ }
294
+
295
+ /* ── Endpoint Table ── */
296
+ .endpoint-table {
297
+ width: 100%;
298
+ border-collapse: collapse;
299
+ border-radius: 16px;
300
+ overflow: hidden;
301
+ background: var(--bg-card);
302
+ border: 1px solid var(--border-card);
303
+ backdrop-filter: blur(12px);
304
+ }
305
+ .endpoint-table th {
306
+ text-align: left;
307
+ padding: 14px 20px;
308
+ font-size: 0.78rem;
309
+ font-weight: 600;
310
+ color: var(--text-muted);
311
+ text-transform: uppercase;
312
+ letter-spacing: 0.8px;
313
+ border-bottom: 1px solid var(--border-card);
314
+ background: rgba(99,102,241,0.04);
315
+ }
316
+ .endpoint-table td {
317
+ padding: 13px 20px;
318
+ font-size: 0.88rem;
319
+ border-bottom: 1px solid rgba(99,102,241,0.06);
320
+ color: var(--text-secondary);
321
+ }
322
+ .endpoint-table tr:last-child td { border-bottom: none; }
323
+ .endpoint-table tr:hover td { background: rgba(99,102,241,0.03); }
324
+ .endpoint-path {
325
+ font-family: 'JetBrains Mono', monospace;
326
+ font-size: 0.84rem;
327
+ color: var(--accent-cyan);
328
+ }
329
+ .method-badge {
330
+ font-family: 'JetBrains Mono', monospace;
331
+ font-size: 0.72rem;
332
+ font-weight: 600;
333
+ padding: 3px 8px;
334
+ border-radius: 5px;
335
+ text-transform: uppercase;
336
+ }
337
+ .method-badge.get { background: rgba(52,211,153,0.12); color: #34d399; }
338
+ .method-badge.post { background: rgba(96,165,250,0.12); color: #60a5fa; }
339
+
340
+ /* ── How It Works Flow ── */
341
+ .flow-steps {
342
+ display: grid;
343
+ grid-template-columns: repeat(3, 1fr);
344
+ gap: 18px;
345
+ }
346
+ .flow-step {
347
+ padding: 28px 24px;
348
+ border-radius: 16px;
349
+ background: var(--bg-card);
350
+ border: 1px solid var(--border-card);
351
+ backdrop-filter: blur(12px);
352
+ text-align: center;
353
+ position: relative;
354
+ transition: all 0.3s ease;
355
+ }
356
+ .flow-step:hover { border-color: rgba(99,102,241,0.3); transform: translateY(-3px); }
357
+ .flow-icon {
358
+ font-size: 2rem;
359
+ margin-bottom: 14px;
360
+ display: block;
361
+ }
362
+ .flow-step h3 {
363
+ font-size: 1rem;
364
+ font-weight: 700;
365
+ margin-bottom: 10px;
366
+ }
367
+ .flow-step p {
368
+ font-size: 0.85rem;
369
+ color: var(--text-secondary);
370
+ line-height: 1.6;
371
+ }
372
+ .flow-arrow {
373
+ display: none;
374
+ }
375
+
376
+ /* ── Code Block ── */
377
+ .code-block {
378
+ background: rgba(0,0,0,0.4);
379
+ border: 1px solid var(--border-card);
380
+ border-radius: 14px;
381
+ padding: 22px 26px;
382
+ font-family: 'JetBrains Mono', monospace;
383
+ font-size: 0.82rem;
384
+ line-height: 1.8;
385
+ color: var(--text-secondary);
386
+ overflow-x: auto;
387
+ position: relative;
388
+ }
389
+ .code-block .comment { color: var(--text-muted); }
390
+ .code-block .cmd { color: var(--accent-cyan); }
391
+ .code-block .flag { color: var(--accent-amber); }
392
+ .code-block .url { color: var(--accent-indigo); }
393
+
394
+ /* ── Grading Section ── */
395
+ .grading-grid {
396
+ display: grid;
397
+ grid-template-columns: repeat(auto-fill, minmax(230px, 1fr));
398
+ gap: 16px;
399
+ }
400
+ .grade-card {
401
+ padding: 22px;
402
+ border-radius: 14px;
403
+ background: var(--bg-card);
404
+ border: 1px solid var(--border-card);
405
+ text-align: center;
406
+ transition: all 0.3s ease;
407
+ }
408
+ .grade-card:hover { border-color: rgba(99,102,241,0.3); }
409
+ .grade-weight {
410
+ font-size: 1.8rem;
411
+ font-weight: 800;
412
+ margin-bottom: 6px;
413
+ }
414
+ .grade-card:nth-child(1) .grade-weight { color: var(--accent-emerald); }
415
+ .grade-card:nth-child(2) .grade-weight { color: var(--accent-blue); }
416
+ .grade-card:nth-child(3) .grade-weight { color: var(--accent-amber); }
417
+ .grade-card:nth-child(4) .grade-weight { color: var(--accent-rose); }
418
+ .grade-card h4 { font-size: 0.9rem; margin-bottom: 6px; }
419
+ .grade-card p { font-size: 0.8rem; color: var(--text-muted); }
420
+
421
+ /* ── Baseline Scores ── */
422
+ .baseline-bar {
423
+ display: flex;
424
+ align-items: center;
425
+ gap: 14px;
426
+ padding: 14px 20px;
427
+ border-radius: 12px;
428
+ background: var(--bg-card);
429
+ border: 1px solid var(--border-card);
430
+ margin-bottom: 10px;
431
+ transition: all 0.3s;
432
+ }
433
+ .baseline-bar:hover { border-color: rgba(99,102,241,0.3); }
434
+ .baseline-bar .task-name {
435
+ flex: 0 0 260px;
436
+ font-size: 0.85rem;
437
+ font-family: 'JetBrains Mono', monospace;
438
+ color: var(--text-secondary);
439
+ }
440
+ .baseline-bar .bar-track {
441
+ flex: 1;
442
+ height: 8px;
443
+ border-radius: 8px;
444
+ background: rgba(99,102,241,0.08);
445
+ overflow: hidden;
446
+ }
447
+ .baseline-bar .bar-fill {
448
+ height: 100%;
449
+ border-radius: 8px;
450
+ background: var(--gradient-primary);
451
+ transition: width 1.5s ease-out;
452
+ }
453
+ .baseline-bar .score-value {
454
+ flex: 0 0 60px;
455
+ text-align: right;
456
+ font-weight: 700;
457
+ font-size: 0.9rem;
458
+ font-family: 'JetBrains Mono', monospace;
459
+ }
460
+
461
+ /* ── Footer ── */
462
+ .footer {
463
+ text-align: center;
464
+ padding: 48px 0 40px;
465
+ border-top: 1px solid var(--border-card);
466
+ margin-top: 40px;
467
+ }
468
+ .footer p {
469
+ font-size: 0.82rem;
470
+ color: var(--text-muted);
471
+ }
472
+ .footer a {
473
+ color: var(--accent-indigo);
474
+ text-decoration: none;
475
+ }
476
+ .footer a:hover { text-decoration: underline; }
477
+
478
+ /* ── Responsive ── */
479
+ @media (max-width: 768px) {
480
+ .stats { grid-template-columns: repeat(2, 1fr); }
481
+ .flow-steps { grid-template-columns: 1fr; }
482
+ .task-grid { grid-template-columns: 1fr; }
483
+ .grading-grid { grid-template-columns: repeat(2, 1fr); }
484
+ .baseline-bar .task-name { flex: 0 0 160px; font-size: 0.75rem; }
485
+ .endpoint-table { font-size: 0.8rem; }
486
+ }
487
+ @media (max-width: 480px) {
488
+ .stats { grid-template-columns: 1fr 1fr; gap: 10px; }
489
+ .grading-grid { grid-template-columns: 1fr; }
490
+ .hero { padding: 50px 0 40px; }
491
+ }
492
+ </style>
493
+ </head>
494
+ <body>
495
+
496
+ <div class="bg-grid"></div>
497
+
498
+ <div class="container">
499
+
500
+ <!-- ═══ HERO ═══ -->
501
+ <section class="hero">
502
+ <div class="hero-badge">
503
+ <span class="dot"></span>
504
+ OpenEnv Environment &middot; Live
505
+ </div>
506
+ <h1>
507
+ <span class="gradient-text">CI/CD + Docker</span><br>
508
+ Debug Environment
509
+ </h1>
510
+ <p>
511
+ An OpenEnv-compatible environment where AI agents learn to debug broken
512
+ GitHub Actions workflows and Dockerfiles. Built for the OpenEnv Hackathon
513
+ by Scaler School of Technology.
514
+ </p>
515
+ <div class="hero-actions">
516
+ <a href="/info" class="btn btn-primary">
517
+ <svg width="16" height="16" fill="none" stroke="currentColor" stroke-width="2" viewBox="0 0 24 24"><path d="M13 2H6a2 2 0 0 0-2 2v16a2 2 0 0 0 2 2h12a2 2 0 0 0 2-2V9z"/><polyline points="13 2 13 9 20 9"/></svg>
518
+ Explore API
519
+ </a>
520
+ <a href="https://github.com/melohub-xbit/GitHubActions-Docker-OpenEnv" target="_blank" class="btn btn-secondary">
521
+ <svg width="16" height="16" fill="currentColor" viewBox="0 0 24 24"><path d="M12 0C5.37 0 0 5.37 0 12c0 5.3 3.44 9.8 8.2 11.39.6.11.82-.26.82-.58v-2.17c-3.34.73-4.04-1.61-4.04-1.61-.55-1.39-1.34-1.76-1.34-1.76-1.09-.75.08-.73.08-.73 1.2.08 1.84 1.24 1.84 1.24 1.07 1.84 2.81 1.31 3.5 1 .1-.78.42-1.31.76-1.61-2.67-.3-5.47-1.33-5.47-5.93 0-1.31.47-2.38 1.24-3.22-.13-.3-.54-1.52.12-3.18 0 0 1-.32 3.3 1.23a11.5 11.5 0 0 1 6.02 0c2.28-1.55 3.28-1.23 3.28-1.23.66 1.66.25 2.88.12 3.18.77.84 1.24 1.91 1.24 3.22 0 4.61-2.81 5.63-5.48 5.92.43.37.81 1.1.81 2.22v3.29c0 .32.22.7.82.58C20.56 21.8 24 17.3 24 12c0-6.63-5.37-12-12-12z"/></svg>
522
+ GitHub
523
+ </a>
524
+ <a href="/docs" class="btn btn-secondary">
525
+ <svg width="16" height="16" fill="none" stroke="currentColor" stroke-width="2" viewBox="0 0 24 24"><path d="M14 2H6a2 2 0 0 0-2 2v16a2 2 0 0 0 2 2h12a2 2 0 0 0 2-2V8z"/><polyline points="14 2 14 8 20 8"/><line x1="16" y1="13" x2="8" y2="13"/><line x1="16" y1="17" x2="8" y2="17"/></svg>
526
+ API Docs
527
+ </a>
528
+ </div>
529
+ </section>
530
+
531
+ <!-- ═══ STATS ═══ -->
532
+ <div class="stats">
533
+ <div class="stat-card">
534
+ <div class="stat-number">6</div>
535
+ <div class="stat-label">Task Categories</div>
536
+ </div>
537
+ <div class="stat-card">
538
+ <div class="stat-number">30</div>
539
+ <div class="stat-label">Unique Scenarios</div>
540
+ </div>
541
+ <div class="stat-card">
542
+ <div class="stat-number">12</div>
543
+ <div class="stat-label">API Endpoints</div>
544
+ </div>
545
+ <div class="stat-card">
546
+ <div class="stat-number">0.547</div>
547
+ <div class="stat-label">Baseline Score (Llama 70B)</div>
548
+ </div>
549
+ </div>
550
+
551
+ <!-- ═══ HOW IT WORKS ═══ -->
552
+ <section class="section">
553
+ <div class="section-header">
554
+ <h2>How It Works</h2>
555
+ <p>Three-phase loop: receive broken configs, fix them, get graded.</p>
556
+ </div>
557
+ <div class="flow-steps">
558
+ <div class="flow-step">
559
+ <span class="flow-icon">πŸ“₯</span>
560
+ <h3>1. Reset</h3>
561
+ <p>Agent receives broken Dockerfile or GitHub Actions YAML, error messages, and available secrets.</p>
562
+ </div>
563
+ <div class="flow-step">
564
+ <span class="flow-icon">πŸ”§</span>
565
+ <h3>2. Observe β†’ Act</h3>
566
+ <p>Read errors, analyze files, edit content, replace lines, or request hints. Up to 10 steps per episode.</p>
567
+ </div>
568
+ <div class="flow-step">
569
+ <span class="flow-icon">πŸ“Š</span>
570
+ <h3>3. Grade</h3>
571
+ <p>Deterministic scoring based on issues fixed, efficiency, and hint usage. Score range: 0.0 β†’ 1.0.</p>
572
+ </div>
573
+ </div>
574
+ </section>
575
+
576
+ <!-- ═══ TASKS ═══ -->
577
+ <section class="section">
578
+ <div class="section-header">
579
+ <h2>The 6 Tasks</h2>
580
+ <p>30 scenarios across 6 categories with clear difficulty progression.</p>
581
+ </div>
582
+ <div class="task-grid">
583
+ <div class="task-card easy">
584
+ <div class="task-header">
585
+ <span class="task-id">dockerfile_syntax</span>
586
+ <span class="difficulty-badge easy">Easy</span>
587
+ </div>
588
+ <h3>Dockerfile Syntax Errors</h3>
589
+ <p>Simple typos and instruction errors that break <code>docker build</code> β€” misspelled filenames, invalid base images, broken line continuations.</p>
590
+ <div class="task-scenarios">5 scenarios</div>
591
+ </div>
592
+ <div class="task-card medium">
593
+ <div class="task-header">
594
+ <span class="task-id">dockerfile_runtime</span>
595
+ <span class="difficulty-badge medium">Medium</span>
596
+ </div>
597
+ <h3>Dockerfile Runtime Errors</h3>
598
+ <p>Dockerfile builds but the container crashes at runtime β€” missing WORKDIR, CMD/ENTRYPOINT conflicts, permission issues.</p>
599
+ <div class="task-scenarios">5 scenarios</div>
600
+ </div>
601
+ <div class="task-card easy">
602
+ <div class="task-header">
603
+ <span class="task-id">workflow_syntax_structure</span>
604
+ <span class="difficulty-badge easy">Easy</span>
605
+ </div>
606
+ <h3>Workflow Syntax &amp; Structure</h3>
607
+ <p>GitHub Actions YAML with structural problems β€” missing <code>runs-on</code>, invalid triggers, steps without actions.</p>
608
+ <div class="task-scenarios">5 scenarios</div>
609
+ </div>
610
+ <div class="task-card medium">
611
+ <div class="task-header">
612
+ <span class="task-id">workflow_secrets_permissions</span>
613
+ <span class="difficulty-badge medium">Medium</span>
614
+ </div>
615
+ <h3>Secrets &amp; Permissions</h3>
616
+ <p>Secrets exist but aren't wired correctly β€” missing <code>env:</code> blocks, wrong syntax, missing token permissions.</p>
617
+ <div class="task-scenarios">5 scenarios</div>
618
+ </div>
619
+ <div class="task-card medium">
620
+ <div class="task-header">
621
+ <span class="task-id">ci_docker_integration</span>
622
+ <span class="difficulty-badge medium-hard">Medium-Hard</span>
623
+ </div>
624
+ <h3>CI + Docker Integration</h3>
625
+ <p>Workflow and Dockerfile interact β€” fixing one alone isn't enough. Build context mismatches, missing login steps.</p>
626
+ <div class="task-scenarios">5 scenarios</div>
627
+ </div>
628
+ <div class="task-card hard">
629
+ <div class="task-header">
630
+ <span class="task-id">multi_stage_pipeline_matrix</span>
631
+ <span class="difficulty-badge hard">Hard</span>
632
+ </div>
633
+ <h3>Multi-Stage &amp; Matrix Pipelines</h3>
634
+ <p>Complex pipelines with 2-3 interacting bugs across multiple files. Artifact mismatches, matrix failures, cross-job dependencies.</p>
635
+ <div class="task-scenarios">5 scenarios</div>
636
+ </div>
637
+ </div>
638
+ </section>
639
+
640
+ <!-- ═══ GRADING ═══ -->
641
+ <section class="section">
642
+ <div class="section-header">
643
+ <h2>Grading System</h2>
644
+ <p>Deterministic and dynamic β€” same actions always produce the same score.</p>
645
+ </div>
646
+ <div class="grading-grid">
647
+ <div class="grade-card">
648
+ <div class="grade-weight">40%</div>
649
+ <h4>Partial Fixes</h4>
650
+ <p>Proportional to fraction of issues fixed</p>
651
+ </div>
652
+ <div class="grade-card">
653
+ <div class="grade-weight">30%</div>
654
+ <h4>Complete Bonus</h4>
655
+ <p>All-or-nothing bonus when every issue is fixed</p>
656
+ </div>
657
+ <div class="grade-card">
658
+ <div class="grade-weight">30%</div>
659
+ <h4>Efficiency</h4>
660
+ <p>Bonus for solving in minimal steps</p>
661
+ </div>
662
+ <div class="grade-card">
663
+ <div class="grade-weight">βˆ’5%</div>
664
+ <h4>Hint Penalty</h4>
665
+ <p>Per hint requested by the agent</p>
666
+ </div>
667
+ </div>
668
+ </section>
669
+
670
+ <!-- ═══ BASELINE RESULTS ═══ -->
671
+ <section class="section">
672
+ <div class="section-header">
673
+ <h2>Baseline Results</h2>
674
+ <p>Tested with <code style="color:var(--accent-cyan)">meta-llama/Llama-3.1-70B-Instruct</code> via HuggingFace router.</p>
675
+ </div>
676
+ <div class="baseline-bars">
677
+ <div class="baseline-bar">
678
+ <span class="task-name">dockerfile_syntax</span>
679
+ <div class="bar-track"><div class="bar-fill" data-width="100"></div></div>
680
+ <span class="score-value" style="color:var(--accent-emerald)">1.000</span>
681
+ </div>
682
+ <div class="baseline-bar">
683
+ <span class="task-name">dockerfile_runtime</span>
684
+ <div class="bar-track"><div class="bar-fill" data-width="100"></div></div>
685
+ <span class="score-value" style="color:var(--accent-emerald)">1.000</span>
686
+ </div>
687
+ <div class="baseline-bar">
688
+ <span class="task-name">workflow_syntax_structure</span>
689
+ <div class="bar-track"><div class="bar-fill" data-width="0"></div></div>
690
+ <span class="score-value" style="color:var(--accent-rose)">0.000</span>
691
+ </div>
692
+ <div class="baseline-bar">
693
+ <span class="task-name">workflow_secrets_permissions</span>
694
+ <div class="bar-track"><div class="bar-fill" data-width="100"></div></div>
695
+ <span class="score-value" style="color:var(--accent-emerald)">1.000</span>
696
+ </div>
697
+ <div class="baseline-bar">
698
+ <span class="task-name">ci_docker_integration</span>
699
+ <div class="bar-track"><div class="bar-fill" data-width="0"></div></div>
700
+ <span class="score-value" style="color:var(--accent-rose)">0.000</span>
701
+ </div>
702
+ <div class="baseline-bar">
703
+ <span class="task-name">multi_stage_pipeline_matrix</span>
704
+ <div class="bar-track"><div class="bar-fill" data-width="28"></div></div>
705
+ <span class="score-value" style="color:var(--accent-amber)">0.283</span>
706
+ </div>
707
+ <div class="baseline-bar" style="border-color: rgba(99,102,241,0.3);">
708
+ <span class="task-name" style="font-weight:700; color:var(--text-primary);">OVERALL</span>
709
+ <div class="bar-track"><div class="bar-fill" data-width="55"></div></div>
710
+ <span class="score-value" style="color:var(--accent-indigo); font-size:1rem;">0.547</span>
711
+ </div>
712
+ </div>
713
+ </section>
714
+
715
+ <!-- ═══ API ENDPOINTS ═══ -->
716
+ <section class="section">
717
+ <div class="section-header">
718
+ <h2>API Endpoints</h2>
719
+ <p>12 endpoints for environment control, grading, and introspection.</p>
720
+ </div>
721
+ <table class="endpoint-table">
722
+ <thead>
723
+ <tr>
724
+ <th>Endpoint</th>
725
+ <th>Method</th>
726
+ <th>Description</th>
727
+ </tr>
728
+ </thead>
729
+ <tbody>
730
+ <tr>
731
+ <td><span class="endpoint-path">/health</span></td>
732
+ <td><span class="method-badge get">GET</span></td>
733
+ <td>Health check β€” returns <code>{"status": "healthy"}</code></td>
734
+ </tr>
735
+ <tr>
736
+ <td><span class="endpoint-path">/metadata</span></td>
737
+ <td><span class="method-badge get">GET</span></td>
738
+ <td>Environment name, description, version, tags</td>
739
+ </tr>
740
+ <tr>
741
+ <td><span class="endpoint-path">/schema</span></td>
742
+ <td><span class="method-badge get">GET</span></td>
743
+ <td>Action, observation, and state JSON schemas</td>
744
+ </tr>
745
+ <tr>
746
+ <td><span class="endpoint-path">/reset</span></td>
747
+ <td><span class="method-badge post">POST</span></td>
748
+ <td>Start a new episode (optional: task_id, scenario_id, seed)</td>
749
+ </tr>
750
+ <tr>
751
+ <td><span class="endpoint-path">/step</span></td>
752
+ <td><span class="method-badge post">POST</span></td>
753
+ <td>Take an action and receive observation + reward</td>
754
+ </tr>
755
+ <tr>
756
+ <td><span class="endpoint-path">/state</span></td>
757
+ <td><span class="method-badge get">GET</span></td>
758
+ <td>Get current observation without acting</td>
759
+ </tr>
760
+ <tr>
761
+ <td><span class="endpoint-path">/info</span></td>
762
+ <td><span class="method-badge get">GET</span></td>
763
+ <td>Task list with metadata</td>
764
+ </tr>
765
+ <tr>
766
+ <td><span class="endpoint-path">/tasks</span></td>
767
+ <td><span class="method-badge get">GET</span></td>
768
+ <td>All tasks with difficulty levels</td>
769
+ </tr>
770
+ <tr>
771
+ <td><span class="endpoint-path">/grader</span></td>
772
+ <td><span class="method-badge post">POST</span></td>
773
+ <td>Grade a trajectory (list of step dicts)</td>
774
+ </tr>
775
+ <tr>
776
+ <td><span class="endpoint-path">/baseline</span></td>
777
+ <td><span class="method-badge post">POST</span></td>
778
+ <td>Run built-in heuristic baseline</td>
779
+ </tr>
780
+ <tr>
781
+ <td><span class="endpoint-path">/mcp</span></td>
782
+ <td><span class="method-badge post">POST</span></td>
783
+ <td>JSON-RPC 2.0 MCP endpoint</td>
784
+ </tr>
785
+ <tr>
786
+ <td><span class="endpoint-path">/docs</span></td>
787
+ <td><span class="method-badge get">GET</span></td>
788
+ <td>Interactive Swagger API documentation</td>
789
+ </tr>
790
+ </tbody>
791
+ </table>
792
+ </section>
793
+
794
+ <!-- ═══ QUICK START ═══ -->
795
+ <section class="section">
796
+ <div class="section-header">
797
+ <h2>Quick Start</h2>
798
+ <p>Run a full episode in 3 commands.</p>
799
+ </div>
800
+ <div class="code-block">
801
+ <span class="comment"># 1. Start an episode</span>
802
+ <span class="cmd">curl</span> -X POST <span class="url">http://localhost:8000/reset</span> \
803
+ -H <span class="flag">"Content-Type: application/json"</span> \
804
+ -d '{"task_id": "dockerfile_syntax", "scenario_id": "typo_filename"}'
805
+
806
+ <span class="comment"># 2. Fix the typo</span>
807
+ <span class="cmd">curl</span> -X POST <span class="url">http://localhost:8000/step</span> \
808
+ -H <span class="flag">"Content-Type: application/json"</span> \
809
+ -d '{"action": {"action_type": "edit_file", "edits": [{"file_path": "Dockerfile", "old_content": "COPY requirments.txt .", "new_content": "COPY requirements.txt ."}]}}'
810
+
811
+ <span class="comment"># 3. Submit</span>
812
+ <span class="cmd">curl</span> -X POST <span class="url">http://localhost:8000/step</span> \
813
+ -H <span class="flag">"Content-Type: application/json"</span> \
814
+ -d '{"action": {"action_type": "submit"}}'
815
+ </div>
816
+ </section>
817
+
818
+ <!-- ═══ FOOTER ═══ -->
819
+ <footer class="footer">
820
+ <p>
821
+ Built for the <strong>OpenEnv Hackathon</strong> by
822
+ <a href="https://github.com/melohub-xbit/GitHubActions-Docker-OpenEnv" target="_blank">Scaler School of Technology</a>
823
+ &middot; Partners: Meta, HuggingFace, PyTorch
824
+ </p>
825
+ <p style="margin-top: 8px;">MIT License</p>
826
+ </footer>
827
+
828
+ </div>
829
+
830
+ <script>
831
+ // Animate baseline score bars on scroll
832
+ const observer = new IntersectionObserver((entries) => {
833
+ entries.forEach(entry => {
834
+ if (entry.isIntersecting) {
835
+ entry.target.querySelectorAll('.bar-fill').forEach(bar => {
836
+ bar.style.width = bar.dataset.width + '%';
837
+ });
838
+ observer.unobserve(entry.target);
839
+ }
840
+ });
841
+ }, { threshold: 0.3 });
842
+
843
+ const bars = document.querySelector('.baseline-bars');
844
+ if (bars) {
845
+ bars.querySelectorAll('.bar-fill').forEach(b => b.style.width = '0%');
846
+ observer.observe(bars);
847
+ }
848
+
849
+ // Staggered card entrance animation
850
+ const cards = document.querySelectorAll('.task-card, .grade-card, .flow-step');
851
+ const cardObserver = new IntersectionObserver((entries) => {
852
+ entries.forEach((entry, i) => {
853
+ if (entry.isIntersecting) {
854
+ entry.target.style.animation = `fadeInUp 0.5s ease-out ${i * 0.06}s both`;
855
+ cardObserver.unobserve(entry.target);
856
+ }
857
+ });
858
+ }, { threshold: 0.15 });
859
+ cards.forEach(c => cardObserver.observe(c));
860
+ </script>
861
+
862
+ </body>
863
+ </html>
tests/test_endpoints.py CHANGED
@@ -7,12 +7,18 @@ from server.main import app
7
  client = TestClient(app)
8
 
9
 
10
- def test_root_health():
11
  response = client.get("/")
12
  assert response.status_code == 200
 
 
 
 
 
 
 
13
  data = response.json()
14
  assert data["status"] == "healthy"
15
- assert data["environment"] == "cicd-docker-env"
16
 
17
 
18
  def test_info_returns_all_tasks():
 
7
  client = TestClient(app)
8
 
9
 
10
+ def test_root_landing_page():
11
  response = client.get("/")
12
  assert response.status_code == 200
13
+ assert "text/html" in response.headers.get("content-type", "")
14
+ assert "CI/CD" in response.text
15
+
16
+
17
+ def test_health_endpoint():
18
+ response = client.get("/health")
19
+ assert response.status_code == 200
20
  data = response.json()
21
  assert data["status"] == "healthy"
 
22
 
23
 
24
  def test_info_returns_all_tasks():