Spaces:

samrat-rm
/

CI_CD_Doctor

Sleeping

App Files Files Community

CI_CD_Doctor / core /generator.py

samrat-rm

Upload folder using huggingface_hub

d7c4dd5 verified 4 days ago

raw

history blame contribute delete

14.8 kB

	"""
	Procedural scenario generator for the CI/CD Doctor environment.
	"""

	import random

	from .packages import get_packages

	PYTHON_VERSIONS = ["3.9", "3.10"] # always wrong; correct is 3.11
	REQUIRED_ENV_VARS = ["DATABASE_URL", "API_KEY", "SECRET_KEY"]
	WRONG_PORTS = [3000, 5000, 9000]
	WRONG_TEST_COMMANDS = [
	"python -m pytest tests/ --collect-only", # collects but never runs
	"python -m unittest discover tests/", # wrong runner
	"python -m pytest tests/ --dry-run", # dry-run, no output
	]


	def generate_easy_scenario(seed: int) -> dict:
	"""
	Returns a filesystem dict + answer_key.
	The filesystem has requirements.txt missing one required package.
	"""
	rng = random.Random(seed)
	all_packages = get_packages("easy")
	missing = rng.choice(all_packages)
	present = [p for p in all_packages if p != missing]

	return {
	"filesystem": {
	"requirements.txt": "\n".join(present) + "\n",
	"pipeline.yaml": "stages:\n - install\n",
	"logs/install.log": "",
	"app.py": "import flask\nimport numpy\n# app code here\n",
	},
	"answer_key": {
	"fixes": {
	"requirements.txt": missing,
	},
	},
	}

	def _medium_type_a(rng: random.Random, all_packages: list) -> dict:
	"""
	Type A: wrong Python version (Dockerfile) + missing env var (.env.ci).
	Pipeline: install → env_check → docker_build
	Both files must be fixed. install always passes.
	"""
	wrong_version = rng.choice(PYTHON_VERSIONS)
	missing_var = rng.choice(REQUIRED_ENV_VARS)
	present_vars = {v: "placeholder" for v in REQUIRED_ENV_VARS if v != missing_var}
	env_ci_content = "".join(f"{k}={v}\n" for k, v in sorted(present_vars.items()))

	return {
	"filesystem": {
	"requirements.txt": "\n".join(all_packages) + "\n",
	"Dockerfile": (
	f"FROM python:{wrong_version}-slim\n"
	"WORKDIR /app\n"
	"COPY requirements.txt .\n"
	"RUN pip install -r requirements.txt\n"
	"COPY . .\n"
	'CMD ["python", "app.py"]\n'
	),
	".env.ci": env_ci_content,
	"pipeline.yaml": "stages:\n - install\n - env_check\n - docker_build\n",
	"app.py": "import flask\n# app code here\n",
	"logs/install.log": "",
	},
	"answer_key": {
	"fixes": {
	"Dockerfile": "python:3.11",
	".env.ci": missing_var,
	},
	},
	}


	def _medium_type_b(rng: random.Random, all_packages: list) -> dict:
	"""
	Type B: missing package (requirements.txt) + deployment flag off (deploy_config.yml).
	Pipeline: install → config_validate → smoke_test
	install fails first; after fixing, config_validate fails.
	"""
	missing_pkg = rng.choice(all_packages)
	present_pkgs = [p for p in all_packages if p != missing_pkg]

	return {
	"filesystem": {
	"requirements.txt": "\n".join(present_pkgs) + "\n",
	"deploy_config.yml": (
	"target_env: production\n"
	"deploy_enabled: false\n" # BUG: must be true
	"replicas: 2\n"
	"health_check_path: /health\n"
	"timeout: 30\n"
	),
	"pipeline.yaml": "stages:\n - install\n - config_validate\n - smoke_test\n",
	"app.py": "import flask\n# app code here\n",
	"logs/install.log": "",
	},
	"answer_key": {
	"fixes": {
	"requirements.txt": missing_pkg,
	"deploy_config.yml": "deploy_enabled: true",
	},
	},
	}


	def _medium_type_c(rng: random.Random, all_packages: list) -> dict:
	"""
	Type C: wrong test command (Makefile) + missing env var (.env.ci).
	Pipeline: install → env_check → test
	env_check fails first; after fixing, test fails due to bad Makefile.
	"""
	wrong_cmd = rng.choice(WRONG_TEST_COMMANDS)
	missing_var = rng.choice(REQUIRED_ENV_VARS)
	present_vars = {v: "placeholder" for v in REQUIRED_ENV_VARS if v != missing_var}
	env_ci_content = "".join(f"{k}={v}\n" for k, v in sorted(present_vars.items()))

	return {
	"filesystem": {
	"requirements.txt": "\n".join(all_packages) + "\n",
	".env.ci": env_ci_content,
	"Makefile": (
	".PHONY: test\n"
	"test:\n"
	f"\t{wrong_cmd}\n"
	),
	"pipeline.yaml": "stages:\n - install\n - env_check\n - test\n",
	"app.py": "import flask\n# app code here\n",
	"logs/install.log": "",
	},
	"answer_key": {
	"fixes": {
	".env.ci": missing_var,
	"Makefile": "python -m pytest tests/",
	},
	},
	}


	def _medium_type_d(rng: random.Random, all_packages: list) -> dict:
	"""
	Type D: wrong port (service.yaml) + wrong Python version (Dockerfile).
	Pipeline: install → port_check → docker_build
	port_check fails first; after fixing, docker_build fails.
	install always passes.
	"""
	wrong_version = rng.choice(PYTHON_VERSIONS)
	wrong_port = rng.choice(WRONG_PORTS)

	return {
	"filesystem": {
	"requirements.txt": "\n".join(all_packages) + "\n",
	"Dockerfile": (
	f"FROM python:{wrong_version}-slim\n"
	"WORKDIR /app\n"
	"COPY requirements.txt .\n"
	"RUN pip install -r requirements.txt\n"
	"COPY . .\n"
	'CMD ["python", "app.py"]\n'
	),
	"service.yaml": (
	"apiVersion: v1\n"
	"kind: Service\n"
	"metadata:\n"
	" name: app\n"
	"spec:\n"
	f" port: {wrong_port}\n"
	),
	"pipeline.yaml": "stages:\n - install\n - port_check\n - docker_build\n",
	"app.py": "import flask\n# app code here\n",
	"logs/install.log": "",
	},
	"answer_key": {
	"fixes": {
	"service.yaml": "port: 8080",
	"Dockerfile": "python:3.11",
	},
	},
	}


	def generate_medium_scenario(seed: int) -> dict:
	"""
	Randomly selects one of four structurally distinct medium scenario types,
	then generates the specifics (which var, which version, etc.) from the
	same seed. Same seed → same scenario every time.
	"""
	rng = random.Random(seed)
	all_packages = get_packages("medium")
	scenario_type = rng.choice(["A", "B", "C", "D"])

	if scenario_type == "A":
	return _medium_type_a(rng, all_packages)
	elif scenario_type == "B":
	return _medium_type_b(rng, all_packages)
	elif scenario_type == "C":
	return _medium_type_c(rng, all_packages)
	else:
	return _medium_type_d(rng, all_packages)


	def _hard_type_a(rng: random.Random, all_packages: list) -> dict:
	"""
	Type A (Interdependent):
	ci.yml ordering → Dockerfile alpine → numpy version pin.

	Behavior:
	- docker_build fails first due to alpine
	- AFTER fixing Docker base, install fails due to numpy incompatibility
	- Demonstrates interdependent failures (fixing one reveals another)

	Pipeline: ci_validate → docker_build(strict) → install(hard).
	"""
	_ = rng

	# numpy version only breaks AFTER Docker base is fixed (alpine -> slim)
	requirements_lines = [
	"numpy==1.21" if pkg == "numpy" else pkg
	for pkg in all_packages
	]

	# Randomize ci.yml format (inline vs YAML list)
	if rng.random() < 0.5:
	ci_content = "stages: test, build, install\n"
	ci_fix = "stages: install, build, test\n"
	else:
	ci_content = (
	"stages:\n"
	" - test\n"
	" - build\n"
	" - install\n"
	)
	ci_fix = (
	"stages:\n"
	" - install\n"
	" - build\n"
	" - test\n"
	)

	return {
	"filesystem": {
	"requirements.txt": "\n".join(requirements_lines) + "\n",
	"Dockerfile": (
	"FROM python:3.11-alpine\n" # BUG 1: causes build failure first
	"WORKDIR /app\n"
	"COPY requirements.txt .\n"
	"RUN pip install -r requirements.txt\n" # BUG 2 surfaces only after base fix
	"COPY . .\n"
	'CMD ["python", "app.py"]\n'
	),
	"ci.yml": ci_content,
	"pipeline.yaml": "stages:\n - ci_validate\n - docker_build\n - install\n",
	"app.py": "import flask\nimport numpy\n# app code here\n",
	"logs/install.log": "",
	},
	"answer_key": {
	"fixes": {
	"ci.yml": ci_fix,
	"Dockerfile": "python:3.11-slim",
	"requirements.txt": "numpy==1.26",
	},
	},
	}


	def _hard_type_b(rng: random.Random, all_packages: list) -> dict:
	"""
	Type B (Interdependent):
	ci.yml ordering → missing env var → wrong test command.

	Behavior:
	- env_check fails first
	- AFTER fixing env, test stage fails due to bad command

	Demonstrates dependency between runtime config and execution.

	Pipeline: ci_validate → env_check → test.
	"""
	wrong_cmd = rng.choice(WRONG_TEST_COMMANDS)
	missing_var = rng.choice(REQUIRED_ENV_VARS)
	present_vars = {v: "placeholder" for v in REQUIRED_ENV_VARS if v != missing_var}
	env_ci_content = "".join(f"{k}={v}\n" for k, v in sorted(present_vars.items()))

	if rng.random() < 0.5:
	ci_content = "stages: test, build, install\n"
	ci_fix = "stages: install, build, test\n"
	else:
	ci_content = (
	"stages:\n"
	" - test\n"
	" - build\n"
	" - install\n"
	)
	ci_fix = (
	"stages:\n"
	" - install\n"
	" - build\n"
	" - test\n"
	)

	return {
	"filesystem": {
	"requirements.txt": "\n".join(all_packages) + "\n",
	"ci.yml": ci_content,
	".env.ci": env_ci_content,
	"Makefile": (
	".PHONY: test\n"
	"test:\n"
	f"\t{wrong_cmd}\n" # BUG surfaces only after env is fixed
	),
	"pipeline.yaml": "stages:\n - ci_validate\n - env_check\n - test\n",
	"app.py": "import flask\n# app code here\n",
	"logs/install.log": "",
	},
	"answer_key": {
	"fixes": {
	"ci.yml": ci_fix,
	".env.ci": missing_var,
	"Makefile": "python -m pytest tests/",
	},
	},
	}


	def _hard_type_c(rng: random.Random, all_packages: list) -> dict:
	"""
	Type C: Dockerfile alpine → deploy disabled → wrong service port.
	Pipeline: docker_build(strict) → config_validate → port_check.
	"""
	_ = rng # reserved for future per-seed variation
	wrong_port = rng.choice(WRONG_PORTS)

	return {
	"filesystem": {
	"requirements.txt": "\n".join(all_packages) + "\n",
	"Dockerfile": (
	"FROM python:3.11-alpine\n"
	"WORKDIR /app\n"
	"COPY requirements.txt .\n"
	"RUN pip install -r requirements.txt\n"
	"COPY . .\n"
	'CMD ["python", "app.py"]\n'
	),
	"deploy_config.yml": (
	"target_env: production\n"
	"deploy_enabled: false\n"
	"replicas: 2\n"
	"health_check_path: /health\n"
	"timeout: 30\n"
	),
	"service.yaml": (
	"apiVersion: v1\n"
	"kind: Service\n"
	"metadata:\n"
	" name: app\n"
	"spec:\n"
	f" port: {wrong_port}\n"
	),
	"pipeline.yaml": "stages:\n - docker_build\n - config_validate\n - port_check\n",
	"app.py": "import flask\n# app code here\n",
	"logs/install.log": "",
	},
	"answer_key": {
	"fixes": {
	"Dockerfile": "python:3.11-slim",
	"deploy_config.yml": "deploy_enabled: true",
	"service.yaml": "port: 8080",
	},
	},
	}


	def _hard_type_d(rng: random.Random, all_packages: list) -> dict:
	"""
	Type D: missing package → missing env var → Dockerfile alpine.
	Pipeline: install(hard) → env_check → docker_build(strict).
	"""
	missing_pkg = rng.choice(all_packages)
	present_pkgs = [p for p in all_packages if p != missing_pkg]
	missing_var = rng.choice(REQUIRED_ENV_VARS)
	present_vars = {v: "placeholder" for v in REQUIRED_ENV_VARS if v != missing_var}
	env_ci_content = "".join(f"{k}={v}\n" for k, v in sorted(present_vars.items()))

	return {
	"filesystem": {
	"requirements.txt": "\n".join(present_pkgs) + "\n",
	".env.ci": env_ci_content,
	"Dockerfile": (
	"FROM python:3.11-alpine\n"
	"WORKDIR /app\n"
	"COPY requirements.txt .\n"
	"RUN pip install -r requirements.txt\n"
	"COPY . .\n"
	'CMD ["python", "app.py"]\n'
	),
	"pipeline.yaml": "stages:\n - install\n - env_check\n - docker_build\n",
	"app.py": "import flask\n# app code here\n",
	"logs/install.log": "",
	},
	"answer_key": {
	"fixes": {
	"requirements.txt": missing_pkg,
	".env.ci": missing_var,
	"Dockerfile": "python:3.11-slim",
	},
	},
	}


	def generate_hard_scenario(seed: int) -> dict:
	"""
	Randomly selects one of four structurally distinct hard scenario types,
	then generates the specifics from the same seed. Each variant is a
	three-fix cascading failure — each pipeline run stops at the first
	failing stage, so bugs surface one at a time as the agent fixes them.
	Same seed → same scenario every time.
	"""
	rng = random.Random(seed)
	all_packages = get_packages("hard")
	scenario_type = rng.choice(["A", "B", "C", "D"])

	if scenario_type == "A":
	return _hard_type_a(rng, all_packages)
	elif scenario_type == "B":
	return _hard_type_b(rng, all_packages)
	elif scenario_type == "C":
	return _hard_type_c(rng, all_packages)
	else:
	return _hard_type_d(rng, all_packages)