Spaces:

jester1177
/

cloudnative-devops-debug-env

Sleeping

App Files Files Community

Krishna1107 commited on Apr 5

Commit

a7caaff

1 Parent(s): 804f70e

fixing old codes

Browse files

Files changed (20) hide show

baseline_runner.py +2 -5
inference.py +0 -5
server/environment.py +1 -3
server/graders/__init__.py +6 -14
server/graders/base.py +0 -101
server/main.py +1 -3
server/models.py +10 -18
server/simulators/docker_simulator.py +17 -20
server/simulators/workflow_simulator.py +20 -22
server/tasks/base.py +1 -3
server/tasks/task_1_build_errors.py +1 -1
server/tasks/task_2_docker_runtime.py +1 -1
server/tasks/task_3_workflow_syntax.py +1 -1
server/tasks/task_4_workflow_secrets_permissions.py +1 -1
server/tasks/task_5_ci_docker_integration.py +1 -1
server/tasks/task_6_multi_stage_matrix.py +1 -1
server/tasks/task_registry.py +0 -1
server/utils/yaml_parser.py +1 -3
smoke_test.py +14 -5
tests/test_determinism.py +5 -11

baseline_runner.py CHANGED Viewed

@@ -1,11 +1,8 @@
-"""Baseline runner for the /baseline endpoint.
-Runs episodes using a simple heuristic agent (no LLM required).
-The heuristic agent applies expected_fixes directly to demonstrate
-that the environment and grader work correctly end-to-end.
 """
-from __future__ import annotations
 from typing import List, Optional

+"""Heuristic baseline runner for the /baseline endpoint.
+Applies expected_fixes directly to verify the environment + grader work e2e.
 """
 from typing import List, Optional

inference.py CHANGED Viewed

@@ -10,7 +10,6 @@ Usage:
     python inference.py
 """
-from __future__ import annotations
 import json
 import os
@@ -22,7 +21,6 @@ from typing import Any, Dict, List, Optional
 import requests
 from openai import OpenAI
-# ── Configuration ─────────────────────────────────────────────────
 API_BASE_URL = os.getenv("API_BASE_URL", "https://router.huggingface.co/v1")
 MODEL_NAME = os.getenv("MODEL_NAME", "meta-llama/Llama-3.1-70B-Instruct")
@@ -183,7 +181,6 @@ def run_episode(client: OpenAI, task_id: Optional[str] = None, scenario_id: Opti
     actual_task_id = info.get("task_id", task_id or "unknown")
     actual_scenario_id = info.get("scenario_id", scenario_id or "unknown")
-    # ── [START] structured log ──
     print(f"[START] task_id={actual_task_id} scenario_id={actual_scenario_id}")
     messages = [{"role": "system", "content": SYSTEM_PROMPT}]
@@ -221,7 +218,6 @@ def run_episode(client: OpenAI, task_id: Optional[str] = None, scenario_id: Opti
         issues_fixed = step_info.get("issues_fixed", 0)
         issues_total = step_info.get("issues_total", 0)
-        # ── [STEP] structured log ──
         print(f"[STEP] step={total_steps} action={action['action_type']} reward={reward:.2f} done={str(done).lower()} issues_fixed={issues_fixed} issues_total={issues_total}")
         trajectory.append({
@@ -243,7 +239,6 @@ def run_episode(client: OpenAI, task_id: Optional[str] = None, scenario_id: Opti
     result = grade_resp.get("result", {})
     score = result.get("score", 0.0)
-    # ── [END] structured log ──
     print(f"[END] task_id={actual_task_id} scenario_id={actual_scenario_id} score={score:.3f} steps={total_steps}")
     return result

     python inference.py
 """
 import json
 import os
 import requests
 from openai import OpenAI
 API_BASE_URL = os.getenv("API_BASE_URL", "https://router.huggingface.co/v1")
 MODEL_NAME = os.getenv("MODEL_NAME", "meta-llama/Llama-3.1-70B-Instruct")
     actual_task_id = info.get("task_id", task_id or "unknown")
     actual_scenario_id = info.get("scenario_id", scenario_id or "unknown")
     print(f"[START] task_id={actual_task_id} scenario_id={actual_scenario_id}")
     messages = [{"role": "system", "content": SYSTEM_PROMPT}]
         issues_fixed = step_info.get("issues_fixed", 0)
         issues_total = step_info.get("issues_total", 0)
         print(f"[STEP] step={total_steps} action={action['action_type']} reward={reward:.2f} done={str(done).lower()} issues_fixed={issues_fixed} issues_total={issues_total}")
         trajectory.append({
     result = grade_resp.get("result", {})
     score = result.get("score", 0.0)
     print(f"[END] task_id={actual_task_id} scenario_id={actual_scenario_id} score={score:.3f} steps={total_steps}")
     return result

server/environment.py CHANGED Viewed

@@ -1,6 +1,4 @@
-"""Core environment loop for Day 1-2 foundation."""
-from __future__ import annotations
 import copy
 import random

+"""Core environment logic."""
 import copy
 import random

server/graders/__init__.py CHANGED Viewed

@@ -1,21 +1,13 @@
 """Deterministic grader for trajectory scoring.
-Scoring breakdown (matches CONTEXT.md):
-- Partial fixes: 40% proportional to fix ratio
-- Complete solution bonus: 30% if ALL issues fixed
-- Efficiency: 20% max, decays with extra steps
-- Hint penalty: -5% per hint used
-- Failed action penalty: -2% per failed edit (no valid edits)
-Score examples (2-bug scenario):
-  Fix 1/2         → ~0.40
-  Fix 2/2 (slow)  → ~0.85
-  Fix 2/2 (fast)  → ~1.0
-  2 hints used    → -0.10
 """
-from __future__ import annotations
 from typing import Any, Dict, List
 from server.models import GraderResult

 """Deterministic grader for trajectory scoring.
+Scoring weights:
+  partial fixes   40%  (proportional to fix ratio)
+  complete bonus  30%  (all issues fixed)
+  efficiency      30%  (decays with extra steps)
+  hint penalty    -5%  each
+  failed edit     -2%  each
 """
 from typing import Any, Dict, List
 from server.models import GraderResult

server/graders/base.py DELETED Viewed

@@ -1,101 +0,0 @@
-"""Base grader interface with shared scoring utilities.
-The concrete default grader lives in ``server.graders.__init__``.
-This module provides a class-based interface for task-specific overrides.
-"""
-from __future__ import annotations
-from typing import Any, Dict, List
-from server.models import GraderResult
-class BaseGrader:
-    """Base class for task graders.
-    Subclass and override ``grade()`` for task-specific scoring.
-    The default pipeline in ``server.graders.__init__.run_grader``
-    works for all tasks without subclassing.
-    """
-    PARTIAL_FIX_WEIGHT: float = 0.40
-    COMPLETE_BONUS: float = 0.30
-    EFFICIENCY_MAX: float = 0.30
-    EFFICIENCY_DECAY: float = 0.03
-    HINT_PENALTY_EACH: float = 0.05
-    FAILED_ACTION_PENALTY: float = 0.02
-    EDIT_ACTION_TYPES = frozenset({
-        "edit_file", "replace_line", "add_line",
-        "delete_line", "add_block", "delete_block",
-    })
-    def grade(self, task_id: str, trajectory: List[Dict[str, Any]]) -> GraderResult:
-        return self.compute_score(task_id, trajectory)
-    def compute_score(self, task_id: str, trajectory: List[Dict[str, Any]]) -> GraderResult:
-        if not trajectory:
-            return GraderResult(
-                task_id=task_id,
-                score=0.0,
-                breakdown={"partial_fixes": 0.0, "complete_solution": 0.0, "efficiency": 0.0, "hint_penalty": 0.0},
-                feedback="No actions taken",
-                steps_taken=0,
-                hints_used=0,
-            )
-        final_step = trajectory[-1]
-        steps_taken = len(trajectory)
-        hints_used = self._count_hints(trajectory)
-        issues_fixed = int(final_step.get("info", {}).get("issues_fixed", 0))
-        issues_total = max(1, int(final_step.get("info", {}).get("issues_total", 1)))
-        fix_ratio = issues_fixed / issues_total
-        partial_score = self.PARTIAL_FIX_WEIGHT * fix_ratio
-        complete_bonus = self.COMPLETE_BONUS if issues_fixed == issues_total else 0.0
-        efficiency = self._efficiency_score(steps_taken, issues_total, issues_fixed)
-        hint_pen = self.HINT_PENALTY_EACH * hints_used
-        score = max(0.0, min(1.0, partial_score + complete_bonus + efficiency - hint_pen))
-        return GraderResult(
-            task_id=task_id,
-            score=round(score, 3),
-            breakdown={
-                "partial_fixes": round(partial_score, 3),
-                "complete_solution": round(complete_bonus, 3),
-                "efficiency": round(efficiency, 3),
-                "hint_penalty": round(-hint_pen, 3),
-            },
-            feedback=self._feedback_message(score),
-            steps_taken=steps_taken,
-            hints_used=hints_used,
-        )
-    @staticmethod
-    def _count_hints(trajectory: List[Dict[str, Any]]) -> int:
-        return sum(
-            1 for step in trajectory
-            if step.get("action", {}).get("action_type") == "request_hint"
-        )
-    def _efficiency_score(self, steps_taken: int, issues_total: int, issues_fixed: int = 1) -> float:
-        if issues_fixed == 0:
-            return 0.0
-        if steps_taken <= issues_total:
-            return self.EFFICIENCY_MAX
-        return max(0.0, self.EFFICIENCY_MAX - self.EFFICIENCY_DECAY * (steps_taken - issues_total))
-    @staticmethod
-    def _feedback_message(score: float) -> str:
-        if score >= 0.9:
-            return "Excellent! All issues fixed efficiently."
-        if score >= 0.7:
-            return "Good job! Most issues fixed."
-        if score >= 0.5:
-            return "Partial success. Some issues remain."
-        if score >= 0.3:
-            return "Limited progress. Review the error messages carefully."
-        return "Needs improvement. Try analyzing the error phase first."

server/main.py CHANGED Viewed

@@ -1,6 +1,4 @@
-"""FastAPI entrypoint implementing required environment endpoints."""
-from __future__ import annotations
 from pathlib import Path
 from typing import Optional

+"""FastAPI server for the CI/CD Debug Environment."""
 from pathlib import Path
 from typing import Optional

server/models.py CHANGED Viewed

@@ -1,6 +1,4 @@
-"""Typed Pydantic models for API and environment state."""
-from __future__ import annotations
 from enum import Enum
 from typing import Any, Dict, List, Optional, Union
@@ -43,18 +41,18 @@ class ErrorPhase(str, Enum):
 class FileContent(BaseModel):
-    path: str = Field(..., description="File path")
-    content: str = Field(..., description="Current file content")
-    file_type: FileType = Field(..., description="Type of file")
-    line_count: int = Field(..., description="Number of lines")
 class ErrorInfo(BaseModel):
-    phase: ErrorPhase = Field(..., description="Failed phase")
-    error_message: str = Field(..., description="Error output")
-    exit_code: Optional[int] = Field(None, description="Exit code")
-    failed_step: Optional[str] = Field(None, description="Failed step")
-    line_hint: Optional[int] = Field(None, description="Suggested line")
 class Observation(BaseModel):
@@ -89,12 +87,6 @@ class Action(BaseModel):
     reasoning: Optional[str] = None
-class StepResult(BaseModel):
-    observation: Observation
-    reward: float = Field(..., ge=-1.0, le=2.0)
-    done: bool
-    info: Dict[str, Any] = Field(default_factory=dict)
 class TaskInfo(BaseModel):
     id: str

+"""Pydantic models for the API."""
 from enum import Enum
 from typing import Any, Dict, List, Optional, Union
 class FileContent(BaseModel):
+    path: str
+    content: str
+    file_type: FileType
+    line_count: int
 class ErrorInfo(BaseModel):
+    phase: ErrorPhase
+    error_message: str
+    exit_code: Optional[int] = None
+    failed_step: Optional[str] = None
+    line_hint: Optional[int] = None
 class Observation(BaseModel):
     reasoning: Optional[str] = None
 class TaskInfo(BaseModel):
     id: str

server/simulators/docker_simulator.py CHANGED Viewed

@@ -1,6 +1,4 @@
-"""Docker build/run simulator with deterministic rule-based validation."""
-from __future__ import annotations
 from typing import Dict, List, Optional, Set
@@ -70,7 +68,7 @@ class DockerSimulator:
         if not active_lines:
             return {"build_success": False, "run_success": False, "error": "Dockerfile is empty"}
-        # --- ARG before FROM is allowed, but first non-ARG instruction must be FROM ---
         first_non_arg = None
         for line in active_lines:
             token = line.split()[0].upper()
@@ -86,7 +84,7 @@ class DockerSimulator:
                 "error": "Dockerfile must start with FROM",
             }
-        # --- Instruction validation ---
         for idx, raw in enumerate(active_lines, start=1):
             token = raw.split()[0].upper()
             # Handle --platform= prefix on FROM
@@ -110,7 +108,7 @@ class DockerSimulator:
                     "line": idx,
                 }
-        # --- Invalid base image tags ---
         if "FROM python:3.9-slimm" in content:
             return {
                 "build_success": False,
@@ -118,7 +116,7 @@ class DockerSimulator:
                 "error": "pull access denied for python:3.9-slimm",
             }
-        # --- Typo in requirements filename ---
         if "requirments.txt" in content:
             return {
                 "build_success": False,
@@ -126,7 +124,7 @@ class DockerSimulator:
                 "error": "COPY failed: file not found in build context: requirments.txt",
             }
-        # --- COPY source validation ---
         for raw in active_lines:
             upper = raw.upper()
             if upper.startswith("COPY "):
@@ -149,7 +147,7 @@ class DockerSimulator:
                         "error": f"COPY failed: file not found in build context: {src}",
                     }
-        # --- Platform ARG declarations ---
         if "--platform=$BUILDPLATFORM" in content and "ARG BUILDPLATFORM" not in content:
             return {
                 "build_success": False,
@@ -163,7 +161,7 @@ class DockerSimulator:
                 "error": "failed to parse platform: TARGETPLATFORM not declared",
             }
-        # --- Multi-stage artifact path mismatch (dist vs build) ---
         if "COPY --from=builder /app/dist" in content:
             pkg = context_files.get("package.json")
             if pkg and "react-scripts build" in pkg.content:
@@ -173,7 +171,7 @@ class DockerSimulator:
                     "error": "COPY failed: stat app/dist: file does not exist",
                 }
-        # --- EXPOSE string validation ---
         for raw in active_lines:
             upper = raw.upper()
             if upper.startswith("EXPOSE "):
@@ -188,11 +186,11 @@ class DockerSimulator:
                             "error": f"EXPOSE requires numeric port or port/protocol, got: {cleaned}",
                         }
-        # =====================================================
-        # Runtime checks (build succeeds, run may fail)
-        # =====================================================
-        # --- Missing WORKDIR causing module resolution failures ---
         has_workdir = "WORKDIR" in content
         if ("npm start" in content or 'CMD ["npm", "start"]' in content) and not has_workdir:
             return {
@@ -201,7 +199,7 @@ class DockerSimulator:
                 "run_error": "Error: Cannot find module '/package.json'",
             }
-        # --- ENTRYPOINT + identical CMD conflict ---
         if 'ENTRYPOINT ["python"' in content and 'CMD ["python"' in content:
             return {
                 "build_success": True,
@@ -209,7 +207,7 @@ class DockerSimulator:
                 "run_error": "container exits immediately; ENTRYPOINT and CMD both specify full command",
             }
-        # --- Entrypoint script not executable ---
         if 'ENTRYPOINT ["./start.sh"]' in content and "chmod +x" not in content:
             return {
                 "build_success": True,
@@ -217,8 +215,7 @@ class DockerSimulator:
                 "run_error": "exec ./start.sh: permission denied",
             }
-        # --- Missing required ENV variable (DATABASE_URL) ---
-        # Check if the scenario error mentions DATABASE_URL (via context files or content)
         has_database_url_env = "ENV DATABASE_URL" in content
         needs_database_url = (
             "app.py" in content
@@ -232,7 +229,7 @@ class DockerSimulator:
                 "run_error": "KeyError: 'DATABASE_URL' — Application requires DATABASE_URL environment variable",
             }
-        # --- Non-root user binding to privileged port ---
         has_user_switch = False
         expose_port = None
         for raw in active_lines:

+"""Docker build/run simulator — deterministic, rule-based."""
 from typing import Dict, List, Optional, Set
         if not active_lines:
             return {"build_success": False, "run_success": False, "error": "Dockerfile is empty"}
+        # ARG before FROM is fine, but the first real instruction must be FROM
         first_non_arg = None
         for line in active_lines:
             token = line.split()[0].upper()
                 "error": "Dockerfile must start with FROM",
             }
+        # validate instructions
         for idx, raw in enumerate(active_lines, start=1):
             token = raw.split()[0].upper()
             # Handle --platform= prefix on FROM
                     "line": idx,
                 }
+        # known-bad base image tags
         if "FROM python:3.9-slimm" in content:
             return {
                 "build_success": False,
                 "error": "pull access denied for python:3.9-slimm",
             }
+        # typo in requirements filename
         if "requirments.txt" in content:
             return {
                 "build_success": False,
                 "error": "COPY failed: file not found in build context: requirments.txt",
             }
+        # COPY source must exist in build context
         for raw in active_lines:
             upper = raw.upper()
             if upper.startswith("COPY "):
                         "error": f"COPY failed: file not found in build context: {src}",
                     }
+        # platform ARGs need to be declared
         if "--platform=$BUILDPLATFORM" in content and "ARG BUILDPLATFORM" not in content:
             return {
                 "build_success": False,
                 "error": "failed to parse platform: TARGETPLATFORM not declared",
             }
+        # multi-stage: output dir mismatch (dist vs build)
         if "COPY --from=builder /app/dist" in content:
             pkg = context_files.get("package.json")
             if pkg and "react-scripts build" in pkg.content:
                     "error": "COPY failed: stat app/dist: file does not exist",
                 }
+        # EXPOSE must have a numeric port
         for raw in active_lines:
             upper = raw.upper()
             if upper.startswith("EXPOSE "):
                             "error": f"EXPOSE requires numeric port or port/protocol, got: {cleaned}",
                         }
+        # ============================
+        # runtime checks (build OK, run might fail)
+        # ============================
+        # no WORKDIR → module resolution fails
         has_workdir = "WORKDIR" in content
         if ("npm start" in content or 'CMD ["npm", "start"]' in content) and not has_workdir:
             return {
                 "run_error": "Error: Cannot find module '/package.json'",
             }
+        # ENTRYPOINT + CMD both specify python → conflict
         if 'ENTRYPOINT ["python"' in content and 'CMD ["python"' in content:
             return {
                 "build_success": True,
                 "run_error": "container exits immediately; ENTRYPOINT and CMD both specify full command",
             }
+        # entrypoint script needs chmod +x
         if 'ENTRYPOINT ["./start.sh"]' in content and "chmod +x" not in content:
             return {
                 "build_success": True,
                 "run_error": "exec ./start.sh: permission denied",
             }
+        # DATABASE_URL env var missing
         has_database_url_env = "ENV DATABASE_URL" in content
         needs_database_url = (
             "app.py" in content
                 "run_error": "KeyError: 'DATABASE_URL' — Application requires DATABASE_URL environment variable",
             }
+        # non-root user can't bind privileged ports
         has_user_switch = False
         expose_port = None
         for raw in active_lines:

server/simulators/workflow_simulator.py CHANGED Viewed

@@ -1,6 +1,4 @@
-"""Workflow simulator with YAML parse and deterministic CI rule checks."""
-from __future__ import annotations
 import re
 from typing import Any, Dict, List, Optional
@@ -17,7 +15,7 @@ class WorkflowSimulator:
         content = workflow.content
-        # --- Single-brace expression check (${ } instead of ${{ }}) ---
         # Match ${ ... } that is NOT ${{ ... }}
         single_brace = re.findall(r'\$\{(?!\{)\s*[^}]+\}', content)
         if single_brace:
@@ -30,7 +28,7 @@ class WorkflowSimulator:
                 ),
             }
-        # --- YAML parse ---
         try:
             parsed = yaml.safe_load(content)
         except yaml.YAMLError as exc:
@@ -47,7 +45,7 @@ class WorkflowSimulator:
                 "error": "Workflow root must be a mapping",
             }
-        # --- Missing 'on' trigger ---
         if "on" not in parsed and True not in parsed:
             # yaml.safe_load converts `on:` to True key in some contexts
             return {
@@ -56,7 +54,7 @@ class WorkflowSimulator:
                 "error": "Workflow must define an 'on' trigger event",
             }
-        # --- Validate 'on' trigger structure ---
         on_value = parsed.get("on") or parsed.get(True)
         if isinstance(on_value, dict):
             for event_key, event_config in on_value.items():
@@ -73,7 +71,7 @@ class WorkflowSimulator:
                             ),
                         }
-        # --- Jobs validation ---
         jobs = parsed.get("jobs")
         if not isinstance(jobs, dict) or not jobs:
             return {
@@ -98,7 +96,7 @@ class WorkflowSimulator:
             if not isinstance(job, dict):
                 continue
-            # --- Missing runs-on ---
             if "runs-on" not in job:
                 return {
                     "parse_success": False,
@@ -106,7 +104,7 @@ class WorkflowSimulator:
                     "error": f"Job '{job_name}' is missing required field 'runs-on'",
                 }
-            # --- Validate 'needs' references ---
             needs = job.get("needs")
             if needs:
                 needed = [needs] if isinstance(needs, str) else (needs if isinstance(needs, list) else [])
@@ -126,7 +124,7 @@ class WorkflowSimulator:
                     "error": f"Job '{job_name}' steps must be a list",
                 }
-            # --- Validate each step has 'uses' or 'run' ---
             for step in steps:
                 if not isinstance(step, dict):
                     continue
@@ -140,7 +138,7 @@ class WorkflowSimulator:
                         "error": f"Every step must define a 'uses' or 'run' key. Step '{step_name}' has neither.",
                     }
-            # --- Checkout before build order ---
             checkout_index = -1
             build_index = -1
             for idx, step in enumerate(steps):
@@ -162,7 +160,7 @@ class WorkflowSimulator:
                     "exec_error": "Checkout must happen before Docker build steps",
                 }
-        # --- Cross-job artifact dependency check ---
         # If a job uses download-artifact but doesn't declare needs on the upload job
         for job_name, job in jobs.items():
             if not isinstance(job, dict):
@@ -186,7 +184,7 @@ class WorkflowSimulator:
                         ),
                     }
-        # --- Docker login with secrets not wired via env ---
         if has_docker_login:
             # Check if the login step has env block with secrets
             login_has_env_secrets = has_username_secret and has_password_secret
@@ -199,7 +197,7 @@ class WorkflowSimulator:
                         "exec_error": "Docker login secrets not wired — add env block with secrets.DOCKER_USERNAME and secrets.DOCKER_PASSWORD",
                     }
-        # --- Push without login ---
         if has_docker_push and not has_docker_login:
             # Check if using docker/login-action instead
             has_login_action = "docker/login-action" in content
@@ -210,7 +208,7 @@ class WorkflowSimulator:
                     "exec_error": "Docker push without login — add a docker login step before pushing",
                 }
-        # --- GHCR login with wrong credentials ---
         if "docker login ghcr.io" in content:
             if has_password_secret and not has_github_token_secret:
                 return {
@@ -219,7 +217,7 @@ class WorkflowSimulator:
                     "exec_error": "GHCR requires GITHUB_TOKEN for authentication, not DOCKER_PASSWORD",
                 }
-        # --- Missing permissions for GHCR push ---
         if "ghcr.io" in content and "docker push" in content:
             # Check if permissions block has packages: write
             if "packages: write" not in content and "packages:write" not in content:
@@ -229,7 +227,7 @@ class WorkflowSimulator:
                     "exec_error": "GITHUB_TOKEN does not have packages:write permission — add permissions block",
                 }
-        # --- Multi-platform without buildx ---
         if has_platforms and not has_buildx_setup:
             return {
                 "parse_success": True,
@@ -237,7 +235,7 @@ class WorkflowSimulator:
                 "exec_error": "Multi-platform build requires docker/setup-buildx-action",
             }
-        # --- Cache export without buildx driver ---
         if "cache-to:" in content and "cache-from:" in content:
             # Check for mode=max
             if "cache-to: type=gha" in content and "mode=max" not in content:
@@ -247,7 +245,7 @@ class WorkflowSimulator:
                     "exec_error": "GHA cache export needs mode=max for proper cache support",
                 }
-        # --- Build context / Dockerfile path mismatch ---
         for job_name, job in jobs.items():
             if not isinstance(job, dict):
                 continue
@@ -268,7 +266,7 @@ class WorkflowSimulator:
                             "exec_error": f"Dockerfile path '{file_path}' does not match build context '{context}'",
                         }
-        # --- Secret referenced in run but not mapped via env block ---
         for job_name, job in jobs.items():
             if not isinstance(job, dict):
                 continue
@@ -296,7 +294,7 @@ class WorkflowSimulator:
                                 "exec_error": f"{var} is empty — secret not available in shell environment. Map it via env block.",
                             }
-        # --- Matrix: Node version incompatibility check ---
         for job_name, job in jobs.items():
             if not isinstance(job, dict):
                 continue

+"""Workflow simulator — YAML parse + CI rule checks."""
 import re
 from typing import Any, Dict, List, Optional
         content = workflow.content
+        # single-brace expressions: ${ } should be ${{ }}
         # Match ${ ... } that is NOT ${{ ... }}
         single_brace = re.findall(r'\$\{(?!\{)\s*[^}]+\}', content)
         if single_brace:
                 ),
             }
+        # parse yaml
         try:
             parsed = yaml.safe_load(content)
         except yaml.YAMLError as exc:
                 "error": "Workflow root must be a mapping",
             }
+        # needs an 'on' trigger
         if "on" not in parsed and True not in parsed:
             # yaml.safe_load converts `on:` to True key in some contexts
             return {
                 "error": "Workflow must define an 'on' trigger event",
             }
+        # validate trigger structure
         on_value = parsed.get("on") or parsed.get(True)
         if isinstance(on_value, dict):
             for event_key, event_config in on_value.items():
                             ),
                         }
+        # jobs block
         jobs = parsed.get("jobs")
         if not isinstance(jobs, dict) or not jobs:
             return {
             if not isinstance(job, dict):
                 continue
+            # runs-on is required
             if "runs-on" not in job:
                 return {
                     "parse_success": False,
                     "error": f"Job '{job_name}' is missing required field 'runs-on'",
                 }
+            # check 'needs' refs point to real jobs
             needs = job.get("needs")
             if needs:
                 needed = [needs] if isinstance(needs, str) else (needs if isinstance(needs, list) else [])
                     "error": f"Job '{job_name}' steps must be a list",
                 }
+            # every step needs 'uses' or 'run'
             for step in steps:
                 if not isinstance(step, dict):
                     continue
                         "error": f"Every step must define a 'uses' or 'run' key. Step '{step_name}' has neither.",
                     }
+            # checkout must come before docker build
             checkout_index = -1
             build_index = -1
             for idx, step in enumerate(steps):
                     "exec_error": "Checkout must happen before Docker build steps",
                 }
+        # cross-job artifact dependency: download needs 'needs'
         # If a job uses download-artifact but doesn't declare needs on the upload job
         for job_name, job in jobs.items():
             if not isinstance(job, dict):
                         ),
                     }
+        # docker login needs secrets wired via env
         if has_docker_login:
             # Check if the login step has env block with secrets
             login_has_env_secrets = has_username_secret and has_password_secret
                         "exec_error": "Docker login secrets not wired — add env block with secrets.DOCKER_USERNAME and secrets.DOCKER_PASSWORD",
                     }
+        # push without login
         if has_docker_push and not has_docker_login:
             # Check if using docker/login-action instead
             has_login_action = "docker/login-action" in content
                     "exec_error": "Docker push without login — add a docker login step before pushing",
                 }
+        # ghcr.io needs GITHUB_TOKEN not DOCKER_PASSWORD
         if "docker login ghcr.io" in content:
             if has_password_secret and not has_github_token_secret:
                 return {
                     "exec_error": "GHCR requires GITHUB_TOKEN for authentication, not DOCKER_PASSWORD",
                 }
+        # ghcr push needs packages:write permission
         if "ghcr.io" in content and "docker push" in content:
             # Check if permissions block has packages: write
             if "packages: write" not in content and "packages:write" not in content:
                     "exec_error": "GITHUB_TOKEN does not have packages:write permission — add permissions block",
                 }
+        # multi-platform needs buildx
         if has_platforms and not has_buildx_setup:
             return {
                 "parse_success": True,
                 "exec_error": "Multi-platform build requires docker/setup-buildx-action",
             }
+        # GHA cache export needs mode=max
         if "cache-to:" in content and "cache-from:" in content:
             # Check for mode=max
             if "cache-to: type=gha" in content and "mode=max" not in content:
                     "exec_error": "GHA cache export needs mode=max for proper cache support",
                 }
+        # context vs dockerfile path mismatch
         for job_name, job in jobs.items():
             if not isinstance(job, dict):
                 continue
                             "exec_error": f"Dockerfile path '{file_path}' does not match build context '{context}'",
                         }
+        # shell env var from secret but not mapped in env block
         for job_name, job in jobs.items():
             if not isinstance(job, dict):
                 continue
                                 "exec_error": f"{var} is empty — secret not available in shell environment. Map it via env block.",
                             }
+        # node version vs package.json engines
         for job_name, job in jobs.items():
             if not isinstance(job, dict):
                 continue

server/tasks/base.py CHANGED Viewed

@@ -1,6 +1,4 @@
-"""Base task class for scenario-based tasks."""
-from __future__ import annotations
 import random
 from typing import Dict, Optional

+"""Base task class."""
 import random
 from typing import Dict, Optional

server/tasks/task_1_build_errors.py CHANGED Viewed

@@ -5,7 +5,7 @@ typos in filenames, invalid base image tags, bad RUN syntax,
 quoted EXPOSE values, missing FROM instruction.
 """
-from __future__ import annotations
 from server.models import TaskDifficulty
 from server.tasks.base import BaseTask

 quoted EXPOSE values, missing FROM instruction.
 """
 from server.models import TaskDifficulty
 from server.tasks.base import BaseTask

server/tasks/task_2_docker_runtime.py CHANGED Viewed

@@ -5,7 +5,7 @@ runtime: missing WORKDIR, CMD/ENTRYPOINT conflicts, permission issues,
 and missing environment variables.
 """
-from __future__ import annotations
 from server.models import TaskDifficulty
 from server.tasks.base import BaseTask

 and missing environment variables.
 """
 from server.models import TaskDifficulty
 from server.tasks.base import BaseTask

server/tasks/task_3_workflow_syntax.py CHANGED Viewed

@@ -5,7 +5,7 @@ step ordering, missing runs-on, invalid triggers, duplicate job IDs,
 and missing 'on' trigger.
 """
-from __future__ import annotations
 from server.models import TaskDifficulty
 from server.tasks.base import BaseTask

 and missing 'on' trigger.
 """
 from server.models import TaskDifficulty
 from server.tasks.base import BaseTask

server/tasks/task_4_workflow_secrets_permissions.py CHANGED Viewed

@@ -9,7 +9,7 @@ in GitHub Actions workflows:
 - Missing write permission for packages
 """
-from __future__ import annotations
 from server.models import TaskDifficulty
 from server.tasks.base import BaseTask

 - Missing write permission for packages
 """
 from server.models import TaskDifficulty
 from server.tasks.base import BaseTask

server/tasks/task_5_ci_docker_integration.py CHANGED Viewed

@@ -8,7 +8,7 @@ Agent debugs combined workflow + Docker build integration failures:
 - Missing Docker login before push
 """
-from __future__ import annotations
 from server.models import TaskDifficulty
 from server.tasks.base import BaseTask

 - Missing Docker login before push
 """
 from server.models import TaskDifficulty
 from server.tasks.base import BaseTask

server/tasks/task_6_multi_stage_matrix.py CHANGED Viewed

@@ -8,7 +8,7 @@ Agent debugs complex multi-stage Docker builds and matrix CI/CD pipelines:
 - Matrix strategy with version-specific failures
 """
-from __future__ import annotations
 from server.models import TaskDifficulty
 from server.tasks.base import BaseTask

 - Matrix strategy with version-specific failures
 """
 from server.models import TaskDifficulty
 from server.tasks.base import BaseTask

server/tasks/task_registry.py CHANGED Viewed

@@ -1,6 +1,5 @@
 """Task registry for the environment."""
-from __future__ import annotations
 from typing import Dict, Type


1	"""Task registry for the environment."""
2

3
4	from typing import Dict, Type
5

server/utils/yaml_parser.py CHANGED Viewed

@@ -1,6 +1,4 @@
-"""Safe YAML parsing utilities for workflow validation."""
-from __future__ import annotations
 from typing import Any, Optional, Tuple


1	+ """YAML parsing helpers."""


2
3	from typing import Any, Optional, Tuple
4

smoke_test.py CHANGED Viewed

@@ -9,7 +9,6 @@ Modes:
   - live: uses requests against a running server.
 """
-from __future__ import annotations
 import argparse
 import json
@@ -42,7 +41,10 @@ class InProcessClient(EndpointClient):
     def get(self, path: str) -> Tuple[int, Dict[str, Any]]:
         response = self._client.get(path)
-        data = response.json() if response.content else {}
         return response.status_code, data
     def post(self, path: str, body: Optional[Dict[str, Any]] = None) -> Tuple[int, Dict[str, Any]]:
@@ -60,7 +62,10 @@ class LiveClient(EndpointClient):
     def get(self, path: str) -> Tuple[int, Dict[str, Any]]:
         response = self._requests.get(f"{self._base_url}{path}", timeout=20)
-        data = response.json() if response.content else {}
         return response.status_code, data
     def post(self, path: str, body: Optional[Dict[str, Any]] = None) -> Tuple[int, Dict[str, Any]]:
@@ -76,8 +81,12 @@ def assert_true(name: str, cond: bool, details: str = "") -> TestResult:
 def run_smoke(client: EndpointClient) -> int:
     results = []
-    status, data = client.get("/")
-    results.append(assert_true("GET / health", status == 200 and data.get("status") == "healthy", str(data)))
     status, info = client.get("/info")
     results.append(assert_true("GET /info", status == 200 and isinstance(info.get("tasks"), list), str(info)))

   - live: uses requests against a running server.
 """
 import argparse
 import json
     def get(self, path: str) -> Tuple[int, Dict[str, Any]]:
         response = self._client.get(path)
+        try:
+            data = response.json()
+        except Exception:
+            data = {}
         return response.status_code, data
     def post(self, path: str, body: Optional[Dict[str, Any]] = None) -> Tuple[int, Dict[str, Any]]:
     def get(self, path: str) -> Tuple[int, Dict[str, Any]]:
         response = self._requests.get(f"{self._base_url}{path}", timeout=20)
+        try:
+            data = response.json()
+        except Exception:
+            data = {}
         return response.status_code, data
     def post(self, path: str, body: Optional[Dict[str, Any]] = None) -> Tuple[int, Dict[str, Any]]:
 def run_smoke(client: EndpointClient) -> int:
     results = []
+    # root serves the landing page now (HTML), just check it's 200
+    status, _ = client.get("/")
+    results.append(assert_true("GET / landing page", status == 200))
+    status, health = client.get("/health")
+    results.append(assert_true("GET /health", status == 200 and health.get("status") == "healthy", str(health)))
     status, info = client.get("/info")
     results.append(assert_true("GET /info", status == 200 and isinstance(info.get("tasks"), list), str(info)))

tests/test_determinism.py CHANGED Viewed

@@ -1,10 +1,4 @@
-"""Determinism and score range tests for grader and environment.
-Day 7 deliverables:
-- Same trajectory → same score (determinism)
-- Score ranges match CONTEXT.md expectations
-- Difficulty progression verified
-"""
 from server.environment import CICDDebugEnvironment
 from server.graders import run_grader
@@ -12,7 +6,7 @@ from server.models import Action, ActionType, FileEdit
 from server.tasks.task_registry import TASK_REGISTRY
-# ── Determinism Tests ──────────────────────────────────────────────
 def test_reset_deterministic_with_seed():
@@ -89,7 +83,7 @@ def test_full_episode_determinism():
     assert len(set(scores)) == 1, f"Non-deterministic episode scores: {scores}"
-# ── Score Range Tests ──────────────────────────────────────────────
 def test_empty_trajectory_scores_zero():
@@ -174,7 +168,7 @@ def test_score_always_in_0_1_range():
         assert 0.0 <= r.score <= 1.0, f"Score {r.score} out of [0, 1] range"
-# ── Difficulty Progression Tests ───────────────────────────────────
 def test_difficulty_progression():
@@ -241,7 +235,7 @@ def test_all_scenarios_have_required_fields():
             assert len(scenario["expected_fixes"]) >= 1, f"{task_id}/{scenario['id']}: no expected_fixes"
-# ── End-to-End Score Verification ──────────────────────────────────
 def test_end_to_end_grading_all_tasks():

+"""Determinism and score-range tests for the grader and environment."""
 from server.environment import CICDDebugEnvironment
 from server.graders import run_grader
 from server.tasks.task_registry import TASK_REGISTRY
+# -- determinism --
 def test_reset_deterministic_with_seed():
     assert len(set(scores)) == 1, f"Non-deterministic episode scores: {scores}"
+# -- score ranges --
 def test_empty_trajectory_scores_zero():
         assert 0.0 <= r.score <= 1.0, f"Score {r.score} out of [0, 1] range"
+# -- difficulty progression --
 def test_difficulty_progression():
             assert len(scenario["expected_fixes"]) >= 1, f"{task_id}/{scenario['id']}: no expected_fixes"
+# -- e2e grading --
 def test_end_to_end_grading_all_tasks():