Spaces:
Sleeping
Sleeping
| """ | |
| Medium Task: Build Configuration | |
| Scenario: A CI/CD pipeline is failing because a Python project is | |
| missing critical dependencies in its requirements.txt. The agent | |
| must read build error logs, identify the missing packages, and | |
| patch the requirements file to fix the build. | |
| Setup: Creates a Python project structure with a Flask app, test suite, | |
| and a requirements.txt that is deliberately missing 'flask'. | |
| Grader: Run the build script — exit 0 = pass, else fail. | |
| """ | |
| from __future__ import annotations | |
| import os | |
| import stat | |
| import subprocess | |
| import textwrap | |
| WORKSPACE = "/tmp/sre_tasks/medium_build" | |
| BUILD_SCRIPT = f"{WORKSPACE}/run_build.sh" | |
| REQUIREMENTS = f"{WORKSPACE}/requirements.txt" | |
| APP_FILE = f"{WORKSPACE}/app.py" | |
| TEST_FILE = f"{WORKSPACE}/tests/test_app.py" | |
| BUILD_LOG = f"{WORKSPACE}/build_output.log" | |
| SYSTEM_PROMPT = textwrap.dedent("""\ | |
| You are an SRE agent debugging a CI/CD pipeline failure. | |
| INCIDENT REPORT: | |
| - Alert: Build Pipeline FAILED — Stage "install & test" exited non-zero. | |
| - Impact: No new deployments can ship until the build is green. | |
| - Build workspace: /tmp/sre_tasks/medium_build/ | |
| Your task: | |
| 1. Read the build error log at /tmp/sre_tasks/medium_build/build_output.log | |
| 2. Examine the project files to understand what's wrong. | |
| 3. Fix the issue so that running `bash /tmp/sre_tasks/medium_build/run_build.sh` | |
| exits with code 0 (success). | |
| The project is a Python Flask web app with a test suite. | |
| The build script installs dependencies and runs tests. | |
| """) | |
| def setup() -> str: | |
| """ | |
| Set up the medium_build task. | |
| Creates a Flask project with a deliberately broken requirements.txt | |
| (missing the 'flask' dependency). Pre-generates a realistic build | |
| failure log. | |
| Returns: | |
| The initial observation message. | |
| """ | |
| # Clean workspace | |
| os.makedirs(f"{WORKSPACE}/tests", exist_ok=True) | |
| # Write the Flask app | |
| app_code = textwrap.dedent("""\ | |
| from flask import Flask, jsonify | |
| app = Flask(__name__) | |
| @app.route("/api/status") | |
| def status(): | |
| return jsonify({"status": "ok", "version": "1.2.0"}) | |
| @app.route("/api/users") | |
| def users(): | |
| return jsonify({"users": ["alice", "bob", "charlie"]}) | |
| if __name__ == "__main__": | |
| app.run(host="0.0.0.0", port=5000) | |
| """) | |
| with open(APP_FILE, "w") as f: | |
| f.write(app_code) | |
| # Write the test suite | |
| test_code = textwrap.dedent("""\ | |
| import pytest | |
| from app import app | |
| @pytest.fixture | |
| def client(): | |
| app.config["TESTING"] = True | |
| with app.test_client() as client: | |
| yield client | |
| def test_status_endpoint(client): | |
| response = client.get("/api/status") | |
| assert response.status_code == 200 | |
| data = response.get_json() | |
| assert data["status"] == "ok" | |
| def test_users_endpoint(client): | |
| response = client.get("/api/users") | |
| assert response.status_code == 200 | |
| data = response.get_json() | |
| assert len(data["users"]) == 3 | |
| """) | |
| with open(TEST_FILE, "w") as f: | |
| f.write(test_code) | |
| # Write BROKEN requirements.txt — missing flask! | |
| broken_requirements = textwrap.dedent("""\ | |
| # Project dependencies | |
| pytest>=7.0 | |
| requests>=2.28.0 | |
| """) | |
| with open(REQUIREMENTS, "w") as f: | |
| f.write(broken_requirements) | |
| # Write the build script | |
| build_script = textwrap.dedent(f"""\ | |
| #!/bin/bash | |
| set -e | |
| cd {WORKSPACE} | |
| pip install --quiet -r requirements.txt 2>&1 | |
| cd {WORKSPACE} && python -m pytest tests/ -v 2>&1 | |
| echo "BUILD SUCCESSFUL" | |
| """) | |
| with open(BUILD_SCRIPT, "w") as f: | |
| f.write(build_script) | |
| os.chmod(BUILD_SCRIPT, os.stat(BUILD_SCRIPT).st_mode | stat.S_IEXEC) | |
| # Generate a realistic build failure log | |
| build_log = textwrap.dedent("""\ | |
| ===== CI/CD Build Pipeline ===== | |
| Stage: install & test | |
| Timestamp: 2026-03-25 14:22:07 UTC | |
| [1/2] Installing dependencies from requirements.txt... | |
| Successfully installed pytest-7.4.0 requests-2.31.0 | |
| [2/2] Running test suite... | |
| ============================= test session starts ============================== | |
| collected 0 items / 2 errors | |
| _____________________________ ERROR collecting tests/test_app.py ______________________________ | |
| ImportError while importing test module '/workspace/tests/test_app.py'. | |
| Hint: make sure your test modules/packages have valid Python names. | |
| Traceback (most recent call last): | |
| File "/workspace/tests/test_app.py", line 2, in <module> | |
| from app import app | |
| File "/workspace/app.py", line 1, in <module> | |
| from flask import Flask, jsonify | |
| ModuleNotFoundError: No module named 'flask' | |
| =========================== short test summary info ============================ | |
| ERROR tests/test_app.py - ModuleNotFoundError: No module named 'flask' | |
| ============================== 2 errors in 0.12s ============================== | |
| BUILD FAILED — exit code 1 | |
| """) | |
| with open(BUILD_LOG, "w") as f: | |
| f.write(build_log) | |
| return ( | |
| "ALERT: Build Pipeline FAILED — Stage 'install & test' exited non-zero.\n" | |
| "Build log: /tmp/sre_tasks/medium_build/build_output.log\n" | |
| "Build script: /tmp/sre_tasks/medium_build/run_build.sh\n" | |
| "Diagnose and fix the build failure." | |
| ) | |
| def grade() -> float: | |
| """ | |
| Grade the medium_build task. | |
| Runs the build script. Returns 0.95 if it exits 0, else 0.05. | |
| """ | |
| try: | |
| result = subprocess.run( | |
| ["bash", BUILD_SCRIPT], | |
| capture_output=True, | |
| text=True, | |
| timeout=60, | |
| cwd=WORKSPACE, | |
| ) | |
| if result.returncode == 0: | |
| return 0.95 | |
| except Exception: | |
| pass | |
| return 0.05 | |
| def cleanup() -> None: | |
| """No persistent processes to clean up for build tasks.""" | |
| pass | |