""" Procedural scenario generator for the CI/CD Doctor environment. """ import random from .packages import get_packages PYTHON_VERSIONS = ["3.9", "3.10"] # always wrong; correct is 3.11 REQUIRED_ENV_VARS = ["DATABASE_URL", "API_KEY", "SECRET_KEY"] WRONG_PORTS = [3000, 5000, 9000] WRONG_TEST_COMMANDS = [ "python -m pytest tests/ --collect-only", # collects but never runs "python -m unittest discover tests/", # wrong runner "python -m pytest tests/ --dry-run", # dry-run, no output ] def generate_easy_scenario(seed: int) -> dict: """ Returns a filesystem dict + answer_key. The filesystem has requirements.txt missing one required package. """ rng = random.Random(seed) all_packages = get_packages("easy") missing = rng.choice(all_packages) present = [p for p in all_packages if p != missing] return { "filesystem": { "requirements.txt": "\n".join(present) + "\n", "pipeline.yaml": "stages:\n - install\n", "logs/install.log": "", "app.py": "import flask\nimport numpy\n# app code here\n", }, "answer_key": { "fixes": { "requirements.txt": missing, }, }, } def _medium_type_a(rng: random.Random, all_packages: list) -> dict: """ Type A: wrong Python version (Dockerfile) + missing env var (.env.ci). Pipeline: install → env_check → docker_build Both files must be fixed. install always passes. """ wrong_version = rng.choice(PYTHON_VERSIONS) missing_var = rng.choice(REQUIRED_ENV_VARS) present_vars = {v: "placeholder" for v in REQUIRED_ENV_VARS if v != missing_var} env_ci_content = "".join(f"{k}={v}\n" for k, v in sorted(present_vars.items())) return { "filesystem": { "requirements.txt": "\n".join(all_packages) + "\n", "Dockerfile": ( f"FROM python:{wrong_version}-slim\n" "WORKDIR /app\n" "COPY requirements.txt .\n" "RUN pip install -r requirements.txt\n" "COPY . .\n" 'CMD ["python", "app.py"]\n' ), ".env.ci": env_ci_content, "pipeline.yaml": "stages:\n - install\n - env_check\n - docker_build\n", "app.py": "import flask\n# app code here\n", "logs/install.log": "", }, "answer_key": { "fixes": { "Dockerfile": "python:3.11", ".env.ci": missing_var, }, }, } def _medium_type_b(rng: random.Random, all_packages: list) -> dict: """ Type B: missing package (requirements.txt) + deployment flag off (deploy_config.yml). Pipeline: install → config_validate → smoke_test install fails first; after fixing, config_validate fails. """ missing_pkg = rng.choice(all_packages) present_pkgs = [p for p in all_packages if p != missing_pkg] return { "filesystem": { "requirements.txt": "\n".join(present_pkgs) + "\n", "deploy_config.yml": ( "target_env: production\n" "deploy_enabled: false\n" # BUG: must be true "replicas: 2\n" "health_check_path: /health\n" "timeout: 30\n" ), "pipeline.yaml": "stages:\n - install\n - config_validate\n - smoke_test\n", "app.py": "import flask\n# app code here\n", "logs/install.log": "", }, "answer_key": { "fixes": { "requirements.txt": missing_pkg, "deploy_config.yml": "deploy_enabled: true", }, }, } def _medium_type_c(rng: random.Random, all_packages: list) -> dict: """ Type C: wrong test command (Makefile) + missing env var (.env.ci). Pipeline: install → env_check → test env_check fails first; after fixing, test fails due to bad Makefile. """ wrong_cmd = rng.choice(WRONG_TEST_COMMANDS) missing_var = rng.choice(REQUIRED_ENV_VARS) present_vars = {v: "placeholder" for v in REQUIRED_ENV_VARS if v != missing_var} env_ci_content = "".join(f"{k}={v}\n" for k, v in sorted(present_vars.items())) return { "filesystem": { "requirements.txt": "\n".join(all_packages) + "\n", ".env.ci": env_ci_content, "Makefile": ( ".PHONY: test\n" "test:\n" f"\t{wrong_cmd}\n" ), "pipeline.yaml": "stages:\n - install\n - env_check\n - test\n", "app.py": "import flask\n# app code here\n", "logs/install.log": "", }, "answer_key": { "fixes": { ".env.ci": missing_var, "Makefile": "python -m pytest tests/", }, }, } def _medium_type_d(rng: random.Random, all_packages: list) -> dict: """ Type D: wrong port (service.yaml) + wrong Python version (Dockerfile). Pipeline: install → port_check → docker_build port_check fails first; after fixing, docker_build fails. install always passes. """ wrong_version = rng.choice(PYTHON_VERSIONS) wrong_port = rng.choice(WRONG_PORTS) return { "filesystem": { "requirements.txt": "\n".join(all_packages) + "\n", "Dockerfile": ( f"FROM python:{wrong_version}-slim\n" "WORKDIR /app\n" "COPY requirements.txt .\n" "RUN pip install -r requirements.txt\n" "COPY . .\n" 'CMD ["python", "app.py"]\n' ), "service.yaml": ( "apiVersion: v1\n" "kind: Service\n" "metadata:\n" " name: app\n" "spec:\n" f" port: {wrong_port}\n" ), "pipeline.yaml": "stages:\n - install\n - port_check\n - docker_build\n", "app.py": "import flask\n# app code here\n", "logs/install.log": "", }, "answer_key": { "fixes": { "service.yaml": "port: 8080", "Dockerfile": "python:3.11", }, }, } def generate_medium_scenario(seed: int) -> dict: """ Randomly selects one of four structurally distinct medium scenario types, then generates the specifics (which var, which version, etc.) from the same seed. Same seed → same scenario every time. """ rng = random.Random(seed) all_packages = get_packages("medium") scenario_type = rng.choice(["A", "B", "C", "D"]) if scenario_type == "A": return _medium_type_a(rng, all_packages) elif scenario_type == "B": return _medium_type_b(rng, all_packages) elif scenario_type == "C": return _medium_type_c(rng, all_packages) else: return _medium_type_d(rng, all_packages) def _hard_type_a(rng: random.Random, all_packages: list) -> dict: """ Type A: ci.yml ordering → Dockerfile alpine → numpy version pin. Pipeline: ci_validate → docker_build(strict) → install(hard). """ _ = rng # reserved for future per-seed variation requirements_lines = [ "numpy==1.21" if pkg == "numpy" else pkg for pkg in all_packages ] return { "filesystem": { "requirements.txt": "\n".join(requirements_lines) + "\n", "Dockerfile": ( "FROM python:3.11-alpine\n" "WORKDIR /app\n" "COPY requirements.txt .\n" "RUN pip install -r requirements.txt\n" "COPY . .\n" 'CMD ["python", "app.py"]\n' ), "ci.yml": "stages: test, build, install\n", "pipeline.yaml": "stages:\n - ci_validate\n - docker_build\n - install\n", "app.py": "import flask\nimport numpy\n# app code here\n", "logs/install.log": "", }, "answer_key": { "fixes": { "ci.yml": "install, build, test", "Dockerfile": "python:3.11-slim", "requirements.txt": "numpy==1.26", }, }, } def _hard_type_b(rng: random.Random, all_packages: list) -> dict: """ Type B: ci.yml ordering → missing env var → wrong test command. Pipeline: ci_validate → env_check → test. requirements.txt is clean; no Dockerfile needed. """ wrong_cmd = rng.choice(WRONG_TEST_COMMANDS) missing_var = rng.choice(REQUIRED_ENV_VARS) present_vars = {v: "placeholder" for v in REQUIRED_ENV_VARS if v != missing_var} env_ci_content = "".join(f"{k}={v}\n" for k, v in sorted(present_vars.items())) return { "filesystem": { "requirements.txt": "\n".join(all_packages) + "\n", "ci.yml": "stages: test, build, install\n", ".env.ci": env_ci_content, "Makefile": ( ".PHONY: test\n" "test:\n" f"\t{wrong_cmd}\n" ), "pipeline.yaml": "stages:\n - ci_validate\n - env_check\n - test\n", "app.py": "import flask\n# app code here\n", "logs/install.log": "", }, "answer_key": { "fixes": { "ci.yml": "install, build, test", ".env.ci": missing_var, "Makefile": "python -m pytest tests/", }, }, } def _hard_type_c(rng: random.Random, all_packages: list) -> dict: """ Type C: Dockerfile alpine → deploy disabled → wrong service port. Pipeline: docker_build(strict) → config_validate → port_check. """ _ = rng # reserved for future per-seed variation wrong_port = rng.choice(WRONG_PORTS) return { "filesystem": { "requirements.txt": "\n".join(all_packages) + "\n", "Dockerfile": ( "FROM python:3.11-alpine\n" "WORKDIR /app\n" "COPY requirements.txt .\n" "RUN pip install -r requirements.txt\n" "COPY . .\n" 'CMD ["python", "app.py"]\n' ), "deploy_config.yml": ( "target_env: production\n" "deploy_enabled: false\n" "replicas: 2\n" "health_check_path: /health\n" "timeout: 30\n" ), "service.yaml": ( "apiVersion: v1\n" "kind: Service\n" "metadata:\n" " name: app\n" "spec:\n" f" port: {wrong_port}\n" ), "pipeline.yaml": "stages:\n - docker_build\n - config_validate\n - port_check\n", "app.py": "import flask\n# app code here\n", "logs/install.log": "", }, "answer_key": { "fixes": { "Dockerfile": "python:3.11-slim", "deploy_config.yml": "deploy_enabled: true", "service.yaml": "port: 8080", }, }, } def _hard_type_d(rng: random.Random, all_packages: list) -> dict: """ Type D: missing package → missing env var → Dockerfile alpine. Pipeline: install(hard) → env_check → docker_build(strict). """ missing_pkg = rng.choice(all_packages) present_pkgs = [p for p in all_packages if p != missing_pkg] missing_var = rng.choice(REQUIRED_ENV_VARS) present_vars = {v: "placeholder" for v in REQUIRED_ENV_VARS if v != missing_var} env_ci_content = "".join(f"{k}={v}\n" for k, v in sorted(present_vars.items())) return { "filesystem": { "requirements.txt": "\n".join(present_pkgs) + "\n", ".env.ci": env_ci_content, "Dockerfile": ( "FROM python:3.11-alpine\n" "WORKDIR /app\n" "COPY requirements.txt .\n" "RUN pip install -r requirements.txt\n" "COPY . .\n" 'CMD ["python", "app.py"]\n' ), "pipeline.yaml": "stages:\n - install\n - env_check\n - docker_build\n", "app.py": "import flask\n# app code here\n", "logs/install.log": "", }, "answer_key": { "fixes": { "requirements.txt": missing_pkg, ".env.ci": missing_var, "Dockerfile": "python:3.11-slim", }, }, } def generate_hard_scenario(seed: int) -> dict: """ Randomly selects one of four structurally distinct hard scenario types, then generates the specifics from the same seed. Each variant is a three-fix cascading failure — each pipeline run stops at the first failing stage, so bugs surface one at a time as the agent fixes them. Same seed → same scenario every time. """ rng = random.Random(seed) all_packages = get_packages("hard") scenario_type = rng.choice(["A", "B", "C", "D"]) if scenario_type == "A": return _hard_type_a(rng, all_packages) elif scenario_type == "B": return _hard_type_b(rng, all_packages) elif scenario_type == "C": return _hard_type_c(rng, all_packages) else: return _hard_type_d(rng, all_packages)