Krishna1107 commited on
Commit
bb97738
·
1 Parent(s): 0a4f56a

display page corrected

Browse files
Files changed (1) hide show
  1. server/static/index.html +192 -586
server/static/index.html CHANGED
@@ -4,7 +4,7 @@
4
  <meta charset="UTF-8">
5
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
  <title>Cloud-Native DevOps Debug Environment</title>
7
- <meta name="description" content="OpenEnv environment where AI agents learn to debug broken GitHub Actions workflows, Dockerfiles, and Kubernetes manifests.">
8
  <link rel="preconnect" href="https://fonts.googleapis.com">
9
  <link href="https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700;800&family=JetBrains+Mono:wght@400;500&display=swap" rel="stylesheet">
10
  <style>
@@ -39,11 +39,8 @@
39
  overflow-x: hidden;
40
  }
41
 
42
- /* ── Animated Background ── */
43
  .bg-grid {
44
- position: fixed;
45
- inset: 0;
46
- z-index: 0;
47
  background-image:
48
  radial-gradient(circle at 20% 30%, rgba(99,102,241,0.08) 0%, transparent 50%),
49
  radial-gradient(circle at 80% 70%, rgba(34,211,238,0.06) 0%, transparent 50%),
@@ -51,438 +48,127 @@
51
  animation: bgPulse 12s ease-in-out infinite alternate;
52
  }
53
  .bg-grid::after {
54
- content: '';
55
- position: absolute;
56
- inset: 0;
57
  background-image: linear-gradient(rgba(99,102,241,0.03) 1px, transparent 1px),
58
  linear-gradient(90deg, rgba(99,102,241,0.03) 1px, transparent 1px);
59
  background-size: 60px 60px;
60
  }
61
- @keyframes bgPulse {
62
- 0% { opacity: 1; }
63
- 100% { opacity: 0.6; transform: scale(1.02); }
64
- }
65
 
66
- /* ── Main Container ── */
67
- .container {
68
- position: relative;
69
- z-index: 1;
70
- max-width: 1100px;
71
- margin: 0 auto;
72
- padding: 0 24px;
73
- }
74
 
75
- /* ── Hero Section ── */
76
- .hero {
77
- text-align: center;
78
- padding: 80px 0 60px;
79
- }
80
  .hero-badge {
81
- display: inline-flex;
82
- align-items: center;
83
- gap: 8px;
84
- padding: 6px 16px;
85
- border-radius: 100px;
86
- background: rgba(99,102,241,0.1);
87
- border: 1px solid rgba(99,102,241,0.25);
88
- font-size: 0.8rem;
89
- font-weight: 500;
90
- color: var(--accent-indigo);
91
- letter-spacing: 0.5px;
92
- margin-bottom: 28px;
93
  animation: fadeInDown 0.6s ease-out;
94
  }
95
- .hero-badge .dot {
96
- width: 7px; height: 7px;
97
- border-radius: 50%;
98
- background: var(--accent-emerald);
99
- animation: pulse 2s ease-in-out infinite;
100
- }
101
- @keyframes pulse {
102
- 0%, 100% { opacity: 1; box-shadow: 0 0 0 0 rgba(52,211,153,0.5); }
103
- 50% { opacity: 0.7; box-shadow: 0 0 0 6px rgba(52,211,153,0); }
104
- }
105
- @keyframes fadeInDown {
106
- from { opacity: 0; transform: translateY(-16px); }
107
- to { opacity: 1; transform: translateY(0); }
108
- }
109
- @keyframes fadeInUp {
110
- from { opacity: 0; transform: translateY(20px); }
111
- to { opacity: 1; transform: translateY(0); }
112
- }
113
 
114
- .hero h1 {
115
- font-size: clamp(2.2rem, 5vw, 3.4rem);
116
- font-weight: 800;
117
- line-height: 1.15;
118
- letter-spacing: -1.5px;
119
- margin-bottom: 20px;
120
- animation: fadeInUp 0.7s ease-out 0.1s both;
121
- }
122
- .hero h1 .gradient-text {
123
- background: var(--gradient-primary);
124
- -webkit-background-clip: text;
125
- -webkit-text-fill-color: transparent;
126
- background-clip: text;
127
- }
128
- .hero p {
129
- font-size: 1.15rem;
130
- color: var(--text-secondary);
131
- max-width: 650px;
132
- margin: 0 auto 36px;
133
- animation: fadeInUp 0.7s ease-out 0.2s both;
134
- }
135
 
136
- .hero-actions {
137
- display: flex;
138
- gap: 14px;
139
- justify-content: center;
140
- flex-wrap: wrap;
141
- animation: fadeInUp 0.7s ease-out 0.3s both;
142
- }
143
- .btn {
144
- display: inline-flex;
145
- align-items: center;
146
- gap: 8px;
147
- padding: 12px 26px;
148
- border-radius: 12px;
149
- font-family: inherit;
150
- font-size: 0.9rem;
151
- font-weight: 600;
152
- text-decoration: none;
153
- cursor: pointer;
154
- border: none;
155
- transition: all 0.25s ease;
156
- }
157
- .btn-primary {
158
- background: var(--gradient-primary);
159
- color: #0a0e1a;
160
- }
161
  .btn-primary:hover { transform: translateY(-2px); box-shadow: 0 8px 30px rgba(99,102,241,0.3); }
162
- .btn-secondary {
163
- background: rgba(99,102,241,0.1);
164
- border: 1px solid rgba(99,102,241,0.25);
165
- color: var(--accent-indigo);
166
- }
167
  .btn-secondary:hover { background: rgba(99,102,241,0.18); transform: translateY(-2px); }
168
 
169
- /* ── Stats Strip ── */
170
- .stats {
171
- display: grid;
172
- grid-template-columns: repeat(4, 1fr);
173
- gap: 16px;
174
- margin: 0 0 64px;
175
- animation: fadeInUp 0.7s ease-out 0.4s both;
176
- }
177
- .stat-card {
178
- text-align: center;
179
- padding: 24px 16px;
180
- border-radius: 16px;
181
- background: var(--bg-card);
182
- border: 1px solid var(--border-card);
183
- backdrop-filter: blur(12px);
184
- transition: border-color 0.3s;
185
- }
186
  .stat-card:hover { border-color: rgba(99,102,241,0.35); }
187
- .stat-number {
188
- font-size: 2rem;
189
- font-weight: 800;
190
- letter-spacing: -1px;
191
- background: var(--gradient-primary);
192
- -webkit-background-clip: text;
193
- -webkit-text-fill-color: transparent;
194
- background-clip: text;
195
- }
196
- .stat-label {
197
- font-size: 0.82rem;
198
- color: var(--text-muted);
199
- margin-top: 4px;
200
- font-weight: 500;
201
- }
202
 
203
- /* ── Section Headers ── */
204
  .section { margin-bottom: 64px; }
205
- .section-header {
206
- margin-bottom: 28px;
207
- }
208
- .section-header h2 {
209
- font-size: 1.6rem;
210
- font-weight: 700;
211
- letter-spacing: -0.5px;
212
- margin-bottom: 8px;
213
- }
214
- .section-header p {
215
- color: var(--text-secondary);
216
- font-size: 0.95rem;
217
- }
218
-
219
- /* ── Task Cards ── */
220
- .task-grid {
221
- display: grid;
222
- grid-template-columns: repeat(auto-fill, minmax(320px, 1fr));
223
- gap: 18px;
224
- }
225
- .task-card {
226
- padding: 24px;
227
- border-radius: 16px;
228
- background: var(--bg-card);
229
- border: 1px solid var(--border-card);
230
- backdrop-filter: blur(12px);
231
- transition: all 0.3s ease;
232
- position: relative;
233
- overflow: hidden;
234
- }
235
- .task-card::before {
236
- content: '';
237
- position: absolute;
238
- top: 0; left: 0; right: 0;
239
- height: 3px;
240
- border-radius: 16px 16px 0 0;
241
- }
242
- .task-card:hover {
243
- border-color: rgba(99,102,241,0.35);
244
- transform: translateY(-4px);
245
- box-shadow: 0 12px 40px rgba(0,0,0,0.3);
246
- }
247
- .task-card.easy::before { background: linear-gradient(90deg, #34d399, #22d3ee); }
248
- .task-card.medium::before { background: linear-gradient(90deg, #fbbf24, #f97316); }
249
- .task-card.hard::before { background: linear-gradient(90deg, #fb7185, #a78bfa); }
250
-
251
- .task-header {
252
- display: flex;
253
- align-items: flex-start;
254
- justify-content: space-between;
255
- margin-bottom: 12px;
256
- }
257
- .task-id {
258
- font-family: 'JetBrains Mono', monospace;
259
- font-size: 0.78rem;
260
- color: var(--accent-indigo);
261
- background: rgba(99,102,241,0.1);
262
- padding: 3px 10px;
263
- border-radius: 6px;
264
- }
265
- .difficulty-badge {
266
- font-size: 0.72rem;
267
- font-weight: 600;
268
- padding: 3px 10px;
269
- border-radius: 100px;
270
- text-transform: uppercase;
271
- letter-spacing: 0.5px;
272
- }
273
- .difficulty-badge.easy { background: rgba(52,211,153,0.12); color: #34d399; }
274
- .difficulty-badge.medium { background: rgba(251,191,36,0.12); color: #fbbf24; }
275
  .difficulty-badge.medium-hard { background: rgba(249,115,22,0.12); color: #f97316; }
276
- .difficulty-badge.hard { background: rgba(251,113,133,0.12); color: #fb7185; }
277
-
278
- .task-card h3 {
279
- font-size: 1.05rem;
280
- font-weight: 600;
281
- margin-bottom: 8px;
282
- }
283
- .task-card p {
284
- font-size: 0.88rem;
285
- color: var(--text-secondary);
286
- line-height: 1.6;
287
- }
288
- .task-scenarios {
289
- margin-top: 14px;
290
- font-size: 0.78rem;
291
- color: var(--text-muted);
292
- font-weight: 500;
293
- }
294
 
295
- /* ── Endpoint Table ── */
296
- .endpoint-table {
297
- width: 100%;
298
- border-collapse: collapse;
299
- border-radius: 16px;
300
- overflow: hidden;
301
- background: var(--bg-card);
302
- border: 1px solid var(--border-card);
303
- backdrop-filter: blur(12px);
304
- }
305
- .endpoint-table th {
306
- text-align: left;
307
- padding: 14px 20px;
308
- font-size: 0.78rem;
309
- font-weight: 600;
310
- color: var(--text-muted);
311
- text-transform: uppercase;
312
- letter-spacing: 0.8px;
313
- border-bottom: 1px solid var(--border-card);
314
- background: rgba(99,102,241,0.04);
315
- }
316
- .endpoint-table td {
317
- padding: 13px 20px;
318
- font-size: 0.88rem;
319
- border-bottom: 1px solid rgba(99,102,241,0.06);
320
- color: var(--text-secondary);
321
- }
322
- .endpoint-table tr:last-child td { border-bottom: none; }
323
- .endpoint-table tr:hover td { background: rgba(99,102,241,0.03); }
324
- .endpoint-path {
325
- font-family: 'JetBrains Mono', monospace;
326
- font-size: 0.84rem;
327
- color: var(--accent-cyan);
328
- }
329
- .method-badge {
330
- font-family: 'JetBrains Mono', monospace;
331
- font-size: 0.72rem;
332
- font-weight: 600;
333
- padding: 3px 8px;
334
- border-radius: 5px;
335
- text-transform: uppercase;
336
- }
337
- .method-badge.get { background: rgba(52,211,153,0.12); color: #34d399; }
338
- .method-badge.post { background: rgba(96,165,250,0.12); color: #60a5fa; }
339
 
340
- /* ── How It Works Flow ── */
341
- .flow-steps {
342
- display: grid;
343
- grid-template-columns: repeat(3, 1fr);
344
- gap: 18px;
345
- }
346
- .flow-step {
347
- padding: 28px 24px;
348
- border-radius: 16px;
349
- background: var(--bg-card);
350
- border: 1px solid var(--border-card);
351
- backdrop-filter: blur(12px);
352
- text-align: center;
353
- position: relative;
354
- transition: all 0.3s ease;
355
- }
356
  .flow-step:hover { border-color: rgba(99,102,241,0.3); transform: translateY(-3px); }
357
- .flow-icon {
358
- font-size: 2rem;
359
- margin-bottom: 14px;
360
- display: block;
361
- }
362
- .flow-step h3 {
363
- font-size: 1rem;
364
- font-weight: 700;
365
- margin-bottom: 10px;
366
- }
367
- .flow-step p {
368
- font-size: 0.85rem;
369
- color: var(--text-secondary);
370
- line-height: 1.6;
371
- }
372
- .flow-arrow {
373
- display: none;
374
- }
375
-
376
- /* ── Code Block ── */
377
- .code-block {
378
- background: rgba(0,0,0,0.4);
379
- border: 1px solid var(--border-card);
380
- border-radius: 14px;
381
- padding: 22px 26px;
382
- font-family: 'JetBrains Mono', monospace;
383
- font-size: 0.82rem;
384
- line-height: 1.8;
385
- color: var(--text-secondary);
386
- overflow-x: auto;
387
- position: relative;
388
- }
389
- .code-block .comment { color: var(--text-muted); }
390
- .code-block .cmd { color: var(--accent-cyan); }
391
- .code-block .flag { color: var(--accent-amber); }
392
- .code-block .url { color: var(--accent-indigo); }
393
 
394
- /* ── Grading Section ── */
395
- .grading-grid {
396
- display: grid;
397
- grid-template-columns: repeat(auto-fill, minmax(230px, 1fr));
398
- gap: 16px;
399
- }
400
- .grade-card {
401
- padding: 22px;
402
- border-radius: 14px;
403
- background: var(--bg-card);
404
- border: 1px solid var(--border-card);
405
- text-align: center;
406
- transition: all 0.3s ease;
407
- }
408
  .grade-card:hover { border-color: rgba(99,102,241,0.3); }
409
- .grade-weight {
410
- font-size: 1.8rem;
411
- font-weight: 800;
412
- margin-bottom: 6px;
413
- }
414
  .grade-card:nth-child(1) .grade-weight { color: var(--accent-emerald); }
415
  .grade-card:nth-child(2) .grade-weight { color: var(--accent-blue); }
416
  .grade-card:nth-child(3) .grade-weight { color: var(--accent-amber); }
417
- .grade-card:nth-child(4) .grade-weight { color: var(--accent-rose); }
 
418
  .grade-card h4 { font-size: 0.9rem; margin-bottom: 6px; }
419
  .grade-card p { font-size: 0.8rem; color: var(--text-muted); }
420
 
421
- /* ── Baseline Scores ── */
422
- .baseline-bar {
423
- display: flex;
424
- align-items: center;
425
- gap: 14px;
426
- padding: 14px 20px;
427
- border-radius: 12px;
428
- background: var(--bg-card);
429
- border: 1px solid var(--border-card);
430
- margin-bottom: 10px;
431
- transition: all 0.3s;
432
- }
433
- .baseline-bar:hover { border-color: rgba(99,102,241,0.3); }
434
- .baseline-bar .task-name {
435
- flex: 0 0 260px;
436
- font-size: 0.85rem;
437
- font-family: 'JetBrains Mono', monospace;
438
- color: var(--text-secondary);
439
- }
440
- .baseline-bar .bar-track {
441
- flex: 1;
442
- height: 8px;
443
- border-radius: 8px;
444
- background: rgba(99,102,241,0.08);
445
- overflow: hidden;
446
- }
447
- .baseline-bar .bar-fill {
448
- height: 100%;
449
- border-radius: 8px;
450
- background: var(--gradient-primary);
451
- transition: width 1.5s ease-out;
452
- }
453
- .baseline-bar .score-value {
454
- flex: 0 0 60px;
455
- text-align: right;
456
- font-weight: 700;
457
- font-size: 0.9rem;
458
- font-family: 'JetBrains Mono', monospace;
459
- }
460
 
461
- /* ── Footer ── */
462
- .footer {
463
- text-align: center;
464
- padding: 48px 0 40px;
465
- border-top: 1px solid var(--border-card);
466
- margin-top: 40px;
467
- }
468
- .footer p {
469
- font-size: 0.82rem;
470
- color: var(--text-muted);
471
- }
472
- .footer a {
473
- color: var(--accent-indigo);
474
- text-decoration: none;
475
- }
476
  .footer a:hover { text-decoration: underline; }
477
 
478
- /* ── Responsive ── */
479
  @media (max-width: 768px) {
480
  .stats { grid-template-columns: repeat(2, 1fr); }
481
  .flow-steps { grid-template-columns: 1fr; }
482
  .task-grid { grid-template-columns: 1fr; }
483
  .grading-grid { grid-template-columns: repeat(2, 1fr); }
484
- .baseline-bar .task-name { flex: 0 0 160px; font-size: 0.75rem; }
485
- .endpoint-table { font-size: 0.8rem; }
486
  }
487
  @media (max-width: 480px) {
488
  .stats { grid-template-columns: 1fr 1fr; gap: 10px; }
@@ -497,7 +183,6 @@
497
 
498
  <div class="container">
499
 
500
- <!-- ═══ HERO ═══ -->
501
  <section class="hero">
502
  <div class="hero-badge">
503
  <span class="dot"></span>
@@ -508,9 +193,9 @@
508
  Debug Environment
509
  </h1>
510
  <p>
511
- An OpenEnv-compatible environment where AI agents learn to debug broken
512
- GitHub Actions workflows and Dockerfiles. Built for the OpenEnv Hackathon
513
- by Scaler School of Technology.
514
  </p>
515
  <div class="hero-actions">
516
  <a href="/info" class="btn btn-primary">
@@ -528,56 +213,56 @@
528
  </div>
529
  </section>
530
 
531
- <!-- ═══ STATS ═══ -->
532
  <div class="stats">
533
  <div class="stat-card">
534
- <div class="stat-number">6</div>
535
  <div class="stat-label">Task Categories</div>
536
  </div>
537
  <div class="stat-card">
538
- <div class="stat-number">30</div>
539
  <div class="stat-label">Unique Scenarios</div>
540
  </div>
541
  <div class="stat-card">
542
- <div class="stat-number">12</div>
543
- <div class="stat-label">API Endpoints</div>
544
  </div>
545
  <div class="stat-card">
546
- <div class="stat-number">0.547</div>
547
- <div class="stat-label">Baseline Score (Llama 70B)</div>
548
  </div>
549
  </div>
550
 
551
- <!-- ═══ HOW IT WORKS ═══ -->
552
  <section class="section">
553
  <div class="section-header">
554
  <h2>How It Works</h2>
555
- <p>Three-phase loop: receive broken configs, fix them, get graded.</p>
556
  </div>
557
  <div class="flow-steps">
558
  <div class="flow-step">
559
- <span class="flow-icon">📥</span>
560
  <h3>1. Reset</h3>
561
- <p>Agent receives broken Dockerfile or GitHub Actions YAML, error messages, and available secrets.</p>
562
  </div>
563
  <div class="flow-step">
564
- <span class="flow-icon">🔧</span>
565
- <h3>2. Observe Act</h3>
566
- <p>Read errors, analyze files, edit content, replace lines, or request hints. Up to 10 steps per episode.</p>
567
  </div>
568
  <div class="flow-step">
569
- <span class="flow-icon">📊</span>
570
  <h3>3. Grade</h3>
571
- <p>Deterministic scoring based on issues fixed, efficiency, and hint usage. Score range: 0.0 1.0.</p>
572
  </div>
573
  </div>
574
  </section>
575
 
576
- <!-- ═══ TASKS ═══ -->
577
  <section class="section">
578
  <div class="section-header">
579
- <h2>The 6 Tasks</h2>
580
- <p>30 scenarios across 6 categories with clear difficulty progression.</p>
581
  </div>
582
  <div class="task-grid">
583
  <div class="task-card easy">
@@ -586,7 +271,7 @@
586
  <span class="difficulty-badge easy">Easy</span>
587
  </div>
588
  <h3>Dockerfile Syntax Errors</h3>
589
- <p>Simple typos and instruction errors that break <code>docker build</code> misspelled filenames, invalid base images, broken line continuations.</p>
590
  <div class="task-scenarios">5 scenarios</div>
591
  </div>
592
  <div class="task-card medium">
@@ -595,7 +280,7 @@
595
  <span class="difficulty-badge medium">Medium</span>
596
  </div>
597
  <h3>Dockerfile Runtime Errors</h3>
598
- <p>Dockerfile builds but the container crashes at runtime missing WORKDIR, CMD/ENTRYPOINT conflicts, permission issues.</p>
599
  <div class="task-scenarios">5 scenarios</div>
600
  </div>
601
  <div class="task-card easy">
@@ -604,7 +289,7 @@
604
  <span class="difficulty-badge easy">Easy</span>
605
  </div>
606
  <h3>Workflow Syntax &amp; Structure</h3>
607
- <p>GitHub Actions YAML with structural problems missing <code>runs-on</code>, invalid triggers, steps without actions.</p>
608
  <div class="task-scenarios">5 scenarios</div>
609
  </div>
610
  <div class="task-card medium">
@@ -613,7 +298,7 @@
613
  <span class="difficulty-badge medium">Medium</span>
614
  </div>
615
  <h3>Secrets &amp; Permissions</h3>
616
- <p>Secrets exist but aren't wired correctly missing <code>env:</code> blocks, wrong syntax, missing token permissions.</p>
617
  <div class="task-scenarios">5 scenarios</div>
618
  </div>
619
  <div class="task-card medium">
@@ -622,7 +307,7 @@
622
  <span class="difficulty-badge medium-hard">Medium-Hard</span>
623
  </div>
624
  <h3>CI + Docker Integration</h3>
625
- <p>Workflow and Dockerfile interact fixing one alone isn't enough. Build context mismatches, missing login steps.</p>
626
  <div class="task-scenarios">5 scenarios</div>
627
  </div>
628
  <div class="task-card hard">
@@ -631,191 +316,131 @@
631
  <span class="difficulty-badge hard">Hard</span>
632
  </div>
633
  <h3>Multi-Stage &amp; Matrix Pipelines</h3>
634
- <p>Complex pipelines with 2-3 interacting bugs across multiple files. Artifact mismatches, matrix failures, cross-job dependencies.</p>
 
 
 
 
 
 
 
 
 
635
  <div class="task-scenarios">5 scenarios</div>
636
  </div>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
637
  </div>
638
  </section>
639
 
640
- <!-- ═══ GRADING ═══ -->
641
  <section class="section">
642
  <div class="section-header">
643
- <h2>Grading System</h2>
644
- <p>Deterministic and dynamic same actions always produce the same score.</p>
645
  </div>
646
  <div class="grading-grid">
647
  <div class="grade-card">
648
- <div class="grade-weight">40%</div>
649
  <h4>Partial Fixes</h4>
650
- <p>Proportional to fraction of issues fixed</p>
651
  </div>
652
  <div class="grade-card">
653
- <div class="grade-weight">30%</div>
654
  <h4>Complete Bonus</h4>
655
- <p>All-or-nothing bonus when every issue is fixed</p>
656
  </div>
657
  <div class="grade-card">
658
- <div class="grade-weight">30%</div>
659
  <h4>Efficiency</h4>
660
- <p>Bonus for solving in minimal steps</p>
661
  </div>
662
  <div class="grade-card">
663
- <div class="grade-weight">−5%</div>
664
- <h4>Hint Penalty</h4>
665
- <p>Per hint requested by the agent</p>
666
- </div>
667
- </div>
668
- </section>
669
-
670
- <!-- ═══ BASELINE RESULTS ═══ -->
671
- <section class="section">
672
- <div class="section-header">
673
- <h2>Baseline Results</h2>
674
- <p>Tested with <code style="color:var(--accent-cyan)">meta-llama/Llama-3.1-70B-Instruct</code> via HuggingFace router.</p>
675
- </div>
676
- <div class="baseline-bars">
677
- <div class="baseline-bar">
678
- <span class="task-name">dockerfile_syntax</span>
679
- <div class="bar-track"><div class="bar-fill" data-width="100"></div></div>
680
- <span class="score-value" style="color:var(--accent-emerald)">1.000</span>
681
  </div>
682
- <div class="baseline-bar">
683
- <span class="task-name">dockerfile_runtime</span>
684
- <div class="bar-track"><div class="bar-fill" data-width="100"></div></div>
685
- <span class="score-value" style="color:var(--accent-emerald)">1.000</span>
686
- </div>
687
- <div class="baseline-bar">
688
- <span class="task-name">workflow_syntax_structure</span>
689
- <div class="bar-track"><div class="bar-fill" data-width="0"></div></div>
690
- <span class="score-value" style="color:var(--accent-rose)">0.000</span>
691
- </div>
692
- <div class="baseline-bar">
693
- <span class="task-name">workflow_secrets_permissions</span>
694
- <div class="bar-track"><div class="bar-fill" data-width="100"></div></div>
695
- <span class="score-value" style="color:var(--accent-emerald)">1.000</span>
696
- </div>
697
- <div class="baseline-bar">
698
- <span class="task-name">ci_docker_integration</span>
699
- <div class="bar-track"><div class="bar-fill" data-width="0"></div></div>
700
- <span class="score-value" style="color:var(--accent-rose)">0.000</span>
701
- </div>
702
- <div class="baseline-bar">
703
- <span class="task-name">multi_stage_pipeline_matrix</span>
704
- <div class="bar-track"><div class="bar-fill" data-width="28"></div></div>
705
- <span class="score-value" style="color:var(--accent-amber)">0.283</span>
706
- </div>
707
- <div class="baseline-bar" style="border-color: rgba(99,102,241,0.3);">
708
- <span class="task-name" style="font-weight:700; color:var(--text-primary);">OVERALL</span>
709
- <div class="bar-track"><div class="bar-fill" data-width="55"></div></div>
710
- <span class="score-value" style="color:var(--accent-indigo); font-size:1rem;">0.547</span>
711
  </div>
712
  </div>
713
  </section>
714
 
715
- <!-- ═══ API ENDPOINTS ═══ -->
716
  <section class="section">
717
  <div class="section-header">
718
  <h2>API Endpoints</h2>
719
- <p>12 endpoints for environment control, grading, and introspection.</p>
720
  </div>
721
  <table class="endpoint-table">
722
  <thead>
723
- <tr>
724
- <th>Endpoint</th>
725
- <th>Method</th>
726
- <th>Description</th>
727
- </tr>
728
  </thead>
729
  <tbody>
730
- <tr>
731
- <td><span class="endpoint-path">/health</span></td>
732
- <td><span class="method-badge get">GET</span></td>
733
- <td>Health check — returns <code>{"status": "healthy"}</code></td>
734
- </tr>
735
- <tr>
736
- <td><span class="endpoint-path">/metadata</span></td>
737
- <td><span class="method-badge get">GET</span></td>
738
- <td>Environment name, description, version, tags</td>
739
- </tr>
740
- <tr>
741
- <td><span class="endpoint-path">/schema</span></td>
742
- <td><span class="method-badge get">GET</span></td>
743
- <td>Action, observation, and state JSON schemas</td>
744
- </tr>
745
- <tr>
746
- <td><span class="endpoint-path">/reset</span></td>
747
- <td><span class="method-badge post">POST</span></td>
748
- <td>Start a new episode (optional: task_id, scenario_id, seed)</td>
749
- </tr>
750
- <tr>
751
- <td><span class="endpoint-path">/step</span></td>
752
- <td><span class="method-badge post">POST</span></td>
753
- <td>Take an action and receive observation + reward</td>
754
- </tr>
755
- <tr>
756
- <td><span class="endpoint-path">/state</span></td>
757
- <td><span class="method-badge get">GET</span></td>
758
- <td>Get current observation without acting</td>
759
- </tr>
760
- <tr>
761
- <td><span class="endpoint-path">/info</span></td>
762
- <td><span class="method-badge get">GET</span></td>
763
- <td>Task list with metadata</td>
764
- </tr>
765
- <tr>
766
- <td><span class="endpoint-path">/tasks</span></td>
767
- <td><span class="method-badge get">GET</span></td>
768
- <td>All tasks with difficulty levels</td>
769
- </tr>
770
- <tr>
771
- <td><span class="endpoint-path">/grader</span></td>
772
- <td><span class="method-badge post">POST</span></td>
773
- <td>Grade a trajectory (list of step dicts)</td>
774
- </tr>
775
- <tr>
776
- <td><span class="endpoint-path">/baseline</span></td>
777
- <td><span class="method-badge post">POST</span></td>
778
- <td>Run built-in heuristic baseline</td>
779
- </tr>
780
- <tr>
781
- <td><span class="endpoint-path">/mcp</span></td>
782
- <td><span class="method-badge post">POST</span></td>
783
- <td>JSON-RPC 2.0 MCP endpoint</td>
784
- </tr>
785
- <tr>
786
- <td><span class="endpoint-path">/docs</span></td>
787
- <td><span class="method-badge get">GET</span></td>
788
- <td>Interactive Swagger API documentation</td>
789
- </tr>
790
  </tbody>
791
  </table>
792
  </section>
793
 
794
- <!-- ═══ QUICK START ═══ -->
795
  <section class="section">
796
  <div class="section-header">
797
- <h2>Quick Start</h2>
798
- <p>Run a full episode in 3 commands.</p>
799
  </div>
800
  <div class="code-block">
801
- <span class="comment"># 1. Start an episode</span>
802
  <span class="cmd">curl</span> -X POST <span class="url">http://localhost:8000/reset</span> \
803
  -H <span class="flag">"Content-Type: application/json"</span> \
804
- -d '{"task_id": "dockerfile_syntax", "scenario_id": "typo_filename"}'
805
 
806
- <span class="comment"># 2. Fix the typo</span>
807
  <span class="cmd">curl</span> -X POST <span class="url">http://localhost:8000/step</span> \
808
  -H <span class="flag">"Content-Type: application/json"</span> \
809
- -d '{"action": {"action_type": "edit_file", "edits": [{"file_path": "Dockerfile", "old_content": "COPY requirments.txt .", "new_content": "COPY requirements.txt ."}]}}'
810
 
811
- <span class="comment"># 3. Submit</span>
812
- <span class="cmd">curl</span> -X POST <span class="url">http://localhost:8000/step</span> \
813
- -H <span class="flag">"Content-Type: application/json"</span> \
814
- -d '{"action": {"action_type": "submit"}}'
815
  </div>
816
  </section>
817
 
818
- <!-- ═══ FOOTER ═══ -->
819
  <footer class="footer">
820
  <p>
821
  Built for the <strong>OpenEnv Hackathon</strong> by
@@ -828,35 +453,16 @@
828
  </div>
829
 
830
  <script>
831
- // Animate baseline score bars on scroll
832
- const observer = new IntersectionObserver((entries) => {
833
- entries.forEach(entry => {
834
- if (entry.isIntersecting) {
835
- entry.target.querySelectorAll('.bar-fill').forEach(bar => {
836
- bar.style.width = bar.dataset.width + '%';
837
- });
838
- observer.unobserve(entry.target);
839
- }
840
- });
841
- }, { threshold: 0.3 });
842
-
843
- const bars = document.querySelector('.baseline-bars');
844
- if (bars) {
845
- bars.querySelectorAll('.bar-fill').forEach(b => b.style.width = '0%');
846
- observer.observe(bars);
847
- }
848
-
849
- // Staggered card entrance animation
850
  const cards = document.querySelectorAll('.task-card, .grade-card, .flow-step');
851
- const cardObserver = new IntersectionObserver((entries) => {
852
  entries.forEach((entry, i) => {
853
  if (entry.isIntersecting) {
854
  entry.target.style.animation = `fadeInUp 0.5s ease-out ${i * 0.06}s both`;
855
- cardObserver.unobserve(entry.target);
856
  }
857
  });
858
  }, { threshold: 0.15 });
859
- cards.forEach(c => cardObserver.observe(c));
860
  </script>
861
 
862
  </body>
 
4
  <meta charset="UTF-8">
5
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
  <title>Cloud-Native DevOps Debug Environment</title>
7
+ <meta name="description" content="Train AI agents to debug broken GitHub Actions workflows, Dockerfiles, and Kubernetes manifests. 10 tasks, 50 scenarios, difficulty-aware grading.">
8
  <link rel="preconnect" href="https://fonts.googleapis.com">
9
  <link href="https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700;800&family=JetBrains+Mono:wght@400;500&display=swap" rel="stylesheet">
10
  <style>
 
39
  overflow-x: hidden;
40
  }
41
 
 
42
  .bg-grid {
43
+ position: fixed; inset: 0; z-index: 0;
 
 
44
  background-image:
45
  radial-gradient(circle at 20% 30%, rgba(99,102,241,0.08) 0%, transparent 50%),
46
  radial-gradient(circle at 80% 70%, rgba(34,211,238,0.06) 0%, transparent 50%),
 
48
  animation: bgPulse 12s ease-in-out infinite alternate;
49
  }
50
  .bg-grid::after {
51
+ content: ''; position: absolute; inset: 0;
 
 
52
  background-image: linear-gradient(rgba(99,102,241,0.03) 1px, transparent 1px),
53
  linear-gradient(90deg, rgba(99,102,241,0.03) 1px, transparent 1px);
54
  background-size: 60px 60px;
55
  }
56
+ @keyframes bgPulse { 0% { opacity: 1; } 100% { opacity: 0.6; transform: scale(1.02); } }
 
 
 
57
 
58
+ .container { position: relative; z-index: 1; max-width: 1100px; margin: 0 auto; padding: 0 24px; }
 
 
 
 
 
 
 
59
 
60
+ /* Hero */
61
+ .hero { text-align: center; padding: 80px 0 60px; }
 
 
 
62
  .hero-badge {
63
+ display: inline-flex; align-items: center; gap: 8px;
64
+ padding: 6px 16px; border-radius: 100px;
65
+ background: rgba(99,102,241,0.1); border: 1px solid rgba(99,102,241,0.25);
66
+ font-size: 0.8rem; font-weight: 500; color: var(--accent-indigo);
67
+ letter-spacing: 0.5px; margin-bottom: 28px;
 
 
 
 
 
 
 
68
  animation: fadeInDown 0.6s ease-out;
69
  }
70
+ .hero-badge .dot { width: 7px; height: 7px; border-radius: 50%; background: var(--accent-emerald); animation: pulse 2s ease-in-out infinite; }
71
+ @keyframes pulse { 0%, 100% { opacity: 1; box-shadow: 0 0 0 0 rgba(52,211,153,0.5); } 50% { opacity: 0.7; box-shadow: 0 0 0 6px rgba(52,211,153,0); } }
72
+ @keyframes fadeInDown { from { opacity: 0; transform: translateY(-16px); } to { opacity: 1; transform: translateY(0); } }
73
+ @keyframes fadeInUp { from { opacity: 0; transform: translateY(20px); } to { opacity: 1; transform: translateY(0); } }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
 
75
+ .hero h1 { font-size: clamp(2.2rem, 5vw, 3.4rem); font-weight: 800; line-height: 1.15; letter-spacing: -1.5px; margin-bottom: 20px; animation: fadeInUp 0.7s ease-out 0.1s both; }
76
+ .hero h1 .gradient-text { background: var(--gradient-primary); -webkit-background-clip: text; -webkit-text-fill-color: transparent; background-clip: text; }
77
+ .hero p { font-size: 1.15rem; color: var(--text-secondary); max-width: 680px; margin: 0 auto 36px; animation: fadeInUp 0.7s ease-out 0.2s both; }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
 
79
+ .hero-actions { display: flex; gap: 14px; justify-content: center; flex-wrap: wrap; animation: fadeInUp 0.7s ease-out 0.3s both; }
80
+ .btn { display: inline-flex; align-items: center; gap: 8px; padding: 12px 26px; border-radius: 12px; font-family: inherit; font-size: 0.9rem; font-weight: 600; text-decoration: none; cursor: pointer; border: none; transition: all 0.25s ease; }
81
+ .btn-primary { background: var(--gradient-primary); color: #0a0e1a; }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
  .btn-primary:hover { transform: translateY(-2px); box-shadow: 0 8px 30px rgba(99,102,241,0.3); }
83
+ .btn-secondary { background: rgba(99,102,241,0.1); border: 1px solid rgba(99,102,241,0.25); color: var(--accent-indigo); }
 
 
 
 
84
  .btn-secondary:hover { background: rgba(99,102,241,0.18); transform: translateY(-2px); }
85
 
86
+ /* Stats */
87
+ .stats { display: grid; grid-template-columns: repeat(4, 1fr); gap: 16px; margin: 0 0 64px; animation: fadeInUp 0.7s ease-out 0.4s both; }
88
+ .stat-card { text-align: center; padding: 24px 16px; border-radius: 16px; background: var(--bg-card); border: 1px solid var(--border-card); backdrop-filter: blur(12px); transition: border-color 0.3s; }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
  .stat-card:hover { border-color: rgba(99,102,241,0.35); }
90
+ .stat-number { font-size: 2rem; font-weight: 800; letter-spacing: -1px; background: var(--gradient-primary); -webkit-background-clip: text; -webkit-text-fill-color: transparent; background-clip: text; }
91
+ .stat-label { font-size: 0.82rem; color: var(--text-muted); margin-top: 4px; font-weight: 500; }
 
 
 
 
 
 
 
 
 
 
 
 
 
92
 
93
+ /* Section */
94
  .section { margin-bottom: 64px; }
95
+ .section-header { margin-bottom: 28px; }
96
+ .section-header h2 { font-size: 1.6rem; font-weight: 700; letter-spacing: -0.5px; margin-bottom: 8px; }
97
+ .section-header p { color: var(--text-secondary); font-size: 0.95rem; }
98
+
99
+ /* Task Cards */
100
+ .task-grid { display: grid; grid-template-columns: repeat(auto-fill, minmax(320px, 1fr)); gap: 18px; }
101
+ .task-card { padding: 24px; border-radius: 16px; background: var(--bg-card); border: 1px solid var(--border-card); backdrop-filter: blur(12px); transition: all 0.3s ease; position: relative; overflow: hidden; }
102
+ .task-card::before { content: ''; position: absolute; top: 0; left: 0; right: 0; height: 3px; border-radius: 16px 16px 0 0; }
103
+ .task-card:hover { border-color: rgba(99,102,241,0.35); transform: translateY(-4px); box-shadow: 0 12px 40px rgba(0,0,0,0.3); }
104
+ .task-card.easy::before { background: linear-gradient(90deg, #34d399, #22d3ee); }
105
+ .task-card.medium::before { background: linear-gradient(90deg, #fbbf24, #f97316); }
106
+ .task-card.hard::before { background: linear-gradient(90deg, #fb7185, #a78bfa); }
107
+ .task-card.expert::before { background: linear-gradient(90deg, #a78bfa, #818cf8, #fb7185); }
108
+
109
+ .task-header { display: flex; align-items: flex-start; justify-content: space-between; margin-bottom: 12px; }
110
+ .task-id { font-family: 'JetBrains Mono', monospace; font-size: 0.78rem; color: var(--accent-indigo); background: rgba(99,102,241,0.1); padding: 3px 10px; border-radius: 6px; }
111
+ .difficulty-badge { font-size: 0.72rem; font-weight: 600; padding: 3px 10px; border-radius: 100px; text-transform: uppercase; letter-spacing: 0.5px; }
112
+ .difficulty-badge.easy { background: rgba(52,211,153,0.12); color: #34d399; }
113
+ .difficulty-badge.medium { background: rgba(251,191,36,0.12); color: #fbbf24; }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
114
  .difficulty-badge.medium-hard { background: rgba(249,115,22,0.12); color: #f97316; }
115
+ .difficulty-badge.hard { background: rgba(251,113,133,0.12); color: #fb7185; }
116
+ .difficulty-badge.expert { background: rgba(167,139,250,0.15); color: #a78bfa; }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
 
118
+ .task-card h3 { font-size: 1.05rem; font-weight: 600; margin-bottom: 8px; }
119
+ .task-card p { font-size: 0.88rem; color: var(--text-secondary); line-height: 1.6; }
120
+ .task-scenarios { margin-top: 14px; font-size: 0.78rem; color: var(--text-muted); font-weight: 500; }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
121
 
122
+ /* Flow Steps */
123
+ .flow-steps { display: grid; grid-template-columns: repeat(3, 1fr); gap: 18px; }
124
+ .flow-step { padding: 28px 24px; border-radius: 16px; background: var(--bg-card); border: 1px solid var(--border-card); backdrop-filter: blur(12px); text-align: center; transition: all 0.3s ease; }
 
 
 
 
 
 
 
 
 
 
 
 
 
125
  .flow-step:hover { border-color: rgba(99,102,241,0.3); transform: translateY(-3px); }
126
+ .flow-icon { font-size: 2rem; margin-bottom: 14px; display: block; }
127
+ .flow-step h3 { font-size: 1rem; font-weight: 700; margin-bottom: 10px; }
128
+ .flow-step p { font-size: 0.85rem; color: var(--text-secondary); line-height: 1.6; }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
129
 
130
+ /* Grading */
131
+ .grading-grid { display: grid; grid-template-columns: repeat(auto-fill, minmax(200px, 1fr)); gap: 16px; }
132
+ .grade-card { padding: 22px; border-radius: 14px; background: var(--bg-card); border: 1px solid var(--border-card); text-align: center; transition: all 0.3s ease; }
 
 
 
 
 
 
 
 
 
 
 
133
  .grade-card:hover { border-color: rgba(99,102,241,0.3); }
134
+ .grade-weight { font-size: 1.8rem; font-weight: 800; margin-bottom: 6px; }
 
 
 
 
135
  .grade-card:nth-child(1) .grade-weight { color: var(--accent-emerald); }
136
  .grade-card:nth-child(2) .grade-weight { color: var(--accent-blue); }
137
  .grade-card:nth-child(3) .grade-weight { color: var(--accent-amber); }
138
+ .grade-card:nth-child(4) .grade-weight { color: var(--accent-purple); }
139
+ .grade-card:nth-child(5) .grade-weight { color: var(--accent-rose); }
140
  .grade-card h4 { font-size: 0.9rem; margin-bottom: 6px; }
141
  .grade-card p { font-size: 0.8rem; color: var(--text-muted); }
142
 
143
+ /* Endpoint Table */
144
+ .endpoint-table { width: 100%; border-collapse: collapse; border-radius: 16px; overflow: hidden; background: var(--bg-card); border: 1px solid var(--border-card); backdrop-filter: blur(12px); }
145
+ .endpoint-table th { text-align: left; padding: 14px 20px; font-size: 0.78rem; font-weight: 600; color: var(--text-muted); text-transform: uppercase; letter-spacing: 0.8px; border-bottom: 1px solid var(--border-card); background: rgba(99,102,241,0.04); }
146
+ .endpoint-table td { padding: 13px 20px; font-size: 0.88rem; border-bottom: 1px solid rgba(99,102,241,0.06); color: var(--text-secondary); }
147
+ .endpoint-table tr:last-child td { border-bottom: none; }
148
+ .endpoint-table tr:hover td { background: rgba(99,102,241,0.03); }
149
+ .endpoint-path { font-family: 'JetBrains Mono', monospace; font-size: 0.84rem; color: var(--accent-cyan); }
150
+ .method-badge { font-family: 'JetBrains Mono', monospace; font-size: 0.72rem; font-weight: 600; padding: 3px 8px; border-radius: 5px; text-transform: uppercase; }
151
+ .method-badge.get { background: rgba(52,211,153,0.12); color: #34d399; }
152
+ .method-badge.post { background: rgba(96,165,250,0.12); color: #60a5fa; }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
153
 
154
+ /* Code Block */
155
+ .code-block { background: rgba(0,0,0,0.4); border: 1px solid var(--border-card); border-radius: 14px; padding: 22px 26px; font-family: 'JetBrains Mono', monospace; font-size: 0.82rem; line-height: 1.8; color: var(--text-secondary); overflow-x: auto; }
156
+ .code-block .comment { color: var(--text-muted); }
157
+ .code-block .cmd { color: var(--accent-cyan); }
158
+ .code-block .flag { color: var(--accent-amber); }
159
+ .code-block .url { color: var(--accent-indigo); }
160
+
161
+ /* Footer */
162
+ .footer { text-align: center; padding: 48px 0 40px; border-top: 1px solid var(--border-card); margin-top: 40px; }
163
+ .footer p { font-size: 0.82rem; color: var(--text-muted); }
164
+ .footer a { color: var(--accent-indigo); text-decoration: none; }
 
 
 
 
165
  .footer a:hover { text-decoration: underline; }
166
 
 
167
  @media (max-width: 768px) {
168
  .stats { grid-template-columns: repeat(2, 1fr); }
169
  .flow-steps { grid-template-columns: 1fr; }
170
  .task-grid { grid-template-columns: 1fr; }
171
  .grading-grid { grid-template-columns: repeat(2, 1fr); }
 
 
172
  }
173
  @media (max-width: 480px) {
174
  .stats { grid-template-columns: 1fr 1fr; gap: 10px; }
 
183
 
184
  <div class="container">
185
 
 
186
  <section class="hero">
187
  <div class="hero-badge">
188
  <span class="dot"></span>
 
193
  Debug Environment
194
  </h1>
195
  <p>
196
+ Broken Dockerfiles. Misconfigured workflows. K8s pods stuck in CrashLoopBackOff.
197
+ This environment throws real-world deployment failures at AI agents and
198
+ measures how well they can track down the root cause and fix it.
199
  </p>
200
  <div class="hero-actions">
201
  <a href="/info" class="btn btn-primary">
 
213
  </div>
214
  </section>
215
 
216
+ <!-- Stats -->
217
  <div class="stats">
218
  <div class="stat-card">
219
+ <div class="stat-number">10</div>
220
  <div class="stat-label">Task Categories</div>
221
  </div>
222
  <div class="stat-card">
223
+ <div class="stat-number">50</div>
224
  <div class="stat-label">Unique Scenarios</div>
225
  </div>
226
  <div class="stat-card">
227
+ <div class="stat-number">3</div>
228
+ <div class="stat-label">Simulators (Docker, GHA, K8s)</div>
229
  </div>
230
  <div class="stat-card">
231
+ <div class="stat-number">12</div>
232
+ <div class="stat-label">API Endpoints</div>
233
  </div>
234
  </div>
235
 
236
+ <!-- How It Works -->
237
  <section class="section">
238
  <div class="section-header">
239
  <h2>How It Works</h2>
240
+ <p>Same loop every DevOps engineer runs through, just faster.</p>
241
  </div>
242
  <div class="flow-steps">
243
  <div class="flow-step">
244
+ <span class="flow-icon">&#x1F4E5;</span>
245
  <h3>1. Reset</h3>
246
+ <p>Agent gets broken config files &mdash; a Dockerfile, a workflow YAML, some K8s manifests &mdash; along with whatever error the pipeline spit out.</p>
247
  </div>
248
  <div class="flow-step">
249
+ <span class="flow-icon">&#x1F527;</span>
250
+ <h3>2. Observe &rarr; Act</h3>
251
+ <p>Read the error, find the bug, edit the file. Could be a typo, a wrong port, a missing secret. Up to 10 steps to get it right.</p>
252
  </div>
253
  <div class="flow-step">
254
+ <span class="flow-icon">&#x1F4CA;</span>
255
  <h3>3. Grade</h3>
256
+ <p>Deterministic scoring: how many issues got fixed, how quickly, and whether hints were needed. Harder tasks are graded more generously.</p>
257
  </div>
258
  </div>
259
  </section>
260
 
261
+ <!-- Tasks -->
262
  <section class="section">
263
  <div class="section-header">
264
+ <h2>10 Tasks, 50 Scenarios</h2>
265
+ <p>From single-typo Dockerfile fixes to multi-bug pipeline debugging across GHA + Docker + K8s.</p>
266
  </div>
267
  <div class="task-grid">
268
  <div class="task-card easy">
 
271
  <span class="difficulty-badge easy">Easy</span>
272
  </div>
273
  <h3>Dockerfile Syntax Errors</h3>
274
+ <p>The classic stuff &mdash; misspelled filenames, bad base image tags, broken <code>RUN</code> continuations. Things that make <code>docker build</code> fail immediately.</p>
275
  <div class="task-scenarios">5 scenarios</div>
276
  </div>
277
  <div class="task-card medium">
 
280
  <span class="difficulty-badge medium">Medium</span>
281
  </div>
282
  <h3>Dockerfile Runtime Errors</h3>
283
+ <p>It builds fine, then crashes at runtime. Missing WORKDIR, CMD/ENTRYPOINT conflicts, permission issues, missing env vars.</p>
284
  <div class="task-scenarios">5 scenarios</div>
285
  </div>
286
  <div class="task-card easy">
 
289
  <span class="difficulty-badge easy">Easy</span>
290
  </div>
291
  <h3>Workflow Syntax &amp; Structure</h3>
292
+ <p>GitHub Actions YAML that GitHub refuses to even parse. Missing <code>runs-on</code>, wrong trigger format, steps without actions.</p>
293
  <div class="task-scenarios">5 scenarios</div>
294
  </div>
295
  <div class="task-card medium">
 
298
  <span class="difficulty-badge medium">Medium</span>
299
  </div>
300
  <h3>Secrets &amp; Permissions</h3>
301
+ <p>The secret is right there in the repo settings, but the workflow can't see it. Missing <code>env:</code> blocks, wrong <code>${{ }}</code> syntax, token permission gaps.</p>
302
  <div class="task-scenarios">5 scenarios</div>
303
  </div>
304
  <div class="task-card medium">
 
307
  <span class="difficulty-badge medium-hard">Medium-Hard</span>
308
  </div>
309
  <h3>CI + Docker Integration</h3>
310
+ <p>The workflow and Dockerfile depend on each other. Build context mismatches, missing buildx setup, login without secrets.</p>
311
  <div class="task-scenarios">5 scenarios</div>
312
  </div>
313
  <div class="task-card hard">
 
316
  <span class="difficulty-badge hard">Hard</span>
317
  </div>
318
  <h3>Multi-Stage &amp; Matrix Pipelines</h3>
319
+ <p>Multi-stage builds, matrix strategies, cross-job artifacts. Two or three bugs that only make sense when you look at the files together.</p>
320
+ <div class="task-scenarios">5 scenarios</div>
321
+ </div>
322
+ <div class="task-card medium">
323
+ <div class="task-header">
324
+ <span class="task-id">k8s_pod_failures</span>
325
+ <span class="difficulty-badge medium">Medium</span>
326
+ </div>
327
+ <h3>Kubernetes Pod Failures</h3>
328
+ <p>Pods stuck in CrashLoopBackOff or ImagePullBackOff. OOM kills, wrong commands, missing ConfigMaps, misconfigured probes.</p>
329
  <div class="task-scenarios">5 scenarios</div>
330
  </div>
331
+ <div class="task-card hard">
332
+ <div class="task-header">
333
+ <span class="task-id">k8s_networking</span>
334
+ <span class="difficulty-badge hard">Hard</span>
335
+ </div>
336
+ <h3>K8s Service &amp; Ingress Issues</h3>
337
+ <p>Pods are running, but nobody can reach them. Selector mismatches, wrong targetPorts, NetworkPolicies blocking traffic, missing ingress classes.</p>
338
+ <div class="task-scenarios">5 scenarios</div>
339
+ </div>
340
+ <div class="task-card hard">
341
+ <div class="task-header">
342
+ <span class="task-id">pipeline_build_deploy</span>
343
+ <span class="difficulty-badge hard">Hard</span>
344
+ </div>
345
+ <h3>Build &amp; Push Pipeline</h3>
346
+ <p>End-to-end GHA-to-Docker-to-Registry failures. GHCR tokens not wired, image tag mismatches between build and push, missing permissions.</p>
347
+ <div class="task-scenarios">5 scenarios</div>
348
+ </div>
349
+ <div class="task-card expert">
350
+ <div class="task-header">
351
+ <span class="task-id">pipeline_full_stack</span>
352
+ <span class="difficulty-badge expert">Expert</span>
353
+ </div>
354
+ <h3>Full Stack Pipeline</h3>
355
+ <p>The real deal &mdash; 2 to 4 bugs scattered across a GHA workflow, Dockerfile, and K8s manifests at the same time. Requires cross-file reasoning.</p>
356
+ <div class="task-scenarios">5 scenarios &middot; multi-error</div>
357
+ </div>
358
  </div>
359
  </section>
360
 
361
+ <!-- Grading -->
362
  <section class="section">
363
  <div class="section-header">
364
+ <h2>Grading</h2>
365
+ <p>Deterministic, difficulty-aware scoring. Same actions, same score. Harder tasks get more room to breathe.</p>
366
  </div>
367
  <div class="grading-grid">
368
  <div class="grade-card">
369
+ <div class="grade-weight">35%</div>
370
  <h4>Partial Fixes</h4>
371
+ <p>Credit for each issue you fix, even if you don't get them all</p>
372
  </div>
373
  <div class="grade-card">
374
+ <div class="grade-weight">25%</div>
375
  <h4>Complete Bonus</h4>
376
+ <p>Extra credit when every single issue is resolved</p>
377
  </div>
378
  <div class="grade-card">
379
+ <div class="grade-weight">25%</div>
380
  <h4>Efficiency</h4>
381
+ <p>Fewer steps is better &mdash; decay is gentler on hard tasks</p>
382
  </div>
383
  <div class="grade-card">
384
+ <div class="grade-weight">+3%</div>
385
+ <h4>Difficulty Bonus</h4>
386
+ <p>Solving hard/expert tasks perfectly earns extra points</p>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
387
  </div>
388
+ <div class="grade-card">
389
+ <div class="grade-weight">&minus;3-4%</div>
390
+ <h4>Hint Penalty</h4>
391
+ <p>Per hint used &mdash; cheaper on harder tasks where hints are fair</p>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
392
  </div>
393
  </div>
394
  </section>
395
 
396
+ <!-- API Endpoints -->
397
  <section class="section">
398
  <div class="section-header">
399
  <h2>API Endpoints</h2>
400
+ <p>Everything you need to run episodes, grade trajectories, and inspect the environment.</p>
401
  </div>
402
  <table class="endpoint-table">
403
  <thead>
404
+ <tr><th>Endpoint</th><th>Method</th><th>Description</th></tr>
 
 
 
 
405
  </thead>
406
  <tbody>
407
+ <tr><td><span class="endpoint-path">/health</span></td><td><span class="method-badge get">GET</span></td><td>Returns <code>{"status": "healthy"}</code></td></tr>
408
+ <tr><td><span class="endpoint-path">/metadata</span></td><td><span class="method-badge get">GET</span></td><td>Environment name, version, tags</td></tr>
409
+ <tr><td><span class="endpoint-path">/tasks</span></td><td><span class="method-badge get">GET</span></td><td>All 10 tasks with difficulty levels</td></tr>
410
+ <tr><td><span class="endpoint-path">/info</span></td><td><span class="method-badge get">GET</span></td><td>Full task list with schemas</td></tr>
411
+ <tr><td><span class="endpoint-path">/reset</span></td><td><span class="method-badge post">POST</span></td><td>Start a new episode (pick a task or get a random one)</td></tr>
412
+ <tr><td><span class="endpoint-path">/step</span></td><td><span class="method-badge post">POST</span></td><td>Take an action, get back observation + reward</td></tr>
413
+ <tr><td><span class="endpoint-path">/state</span></td><td><span class="method-badge get">GET</span></td><td>Current observation without acting</td></tr>
414
+ <tr><td><span class="endpoint-path">/grader</span></td><td><span class="method-badge post">POST</span></td><td>Score a trajectory after the episode</td></tr>
415
+ <tr><td><span class="endpoint-path">/baseline</span></td><td><span class="method-badge post">POST</span></td><td>Run the built-in heuristic baseline</td></tr>
416
+ <tr><td><span class="endpoint-path">/schema</span></td><td><span class="method-badge get">GET</span></td><td>Action and observation JSON schemas</td></tr>
417
+ <tr><td><span class="endpoint-path">/mcp</span></td><td><span class="method-badge post">POST</span></td><td>JSON-RPC 2.0 MCP endpoint</td></tr>
418
+ <tr><td><span class="endpoint-path">/docs</span></td><td><span class="method-badge get">GET</span></td><td>Interactive Swagger docs</td></tr>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
419
  </tbody>
420
  </table>
421
  </section>
422
 
423
+ <!-- Quick Start -->
424
  <section class="section">
425
  <div class="section-header">
426
+ <h2>Try It</h2>
427
+ <p>Fix a K8s OOMKilled pod in 3 commands.</p>
428
  </div>
429
  <div class="code-block">
430
+ <span class="comment"># 1. Get a broken K8s deployment with OOMKilled pods</span>
431
  <span class="cmd">curl</span> -X POST <span class="url">http://localhost:8000/reset</span> \
432
  -H <span class="flag">"Content-Type: application/json"</span> \
433
+ -d '{"task_id": "k8s_pod_failures", "scenario_id": "oom_killed"}'
434
 
435
+ <span class="comment"># 2. Bump the memory limit from 64Mi to 256Mi</span>
436
  <span class="cmd">curl</span> -X POST <span class="url">http://localhost:8000/step</span> \
437
  -H <span class="flag">"Content-Type: application/json"</span> \
438
+ -d '{"action": {"action_type": "edit_file", "edits": [{"file_path": "k8s/deployment.yaml", "old_content": "memory: \"64Mi\"", "new_content": "memory: \"256Mi\""}]}}'
439
 
440
+ <span class="comment"># 3. Done - issues_fixed: 1/1, reward: 0.3</span>
 
 
 
441
  </div>
442
  </section>
443
 
 
444
  <footer class="footer">
445
  <p>
446
  Built for the <strong>OpenEnv Hackathon</strong> by
 
453
  </div>
454
 
455
  <script>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
456
  const cards = document.querySelectorAll('.task-card, .grade-card, .flow-step');
457
+ const obs = new IntersectionObserver((entries) => {
458
  entries.forEach((entry, i) => {
459
  if (entry.isIntersecting) {
460
  entry.target.style.animation = `fadeInUp 0.5s ease-out ${i * 0.06}s both`;
461
+ obs.unobserve(entry.target);
462
  }
463
  });
464
  }, { threshold: 0.15 });
465
+ cards.forEach(c => obs.observe(c));
466
  </script>
467
 
468
  </body>