sre-openenv / server /tasks /medium_build.py
Dragonfire146's picture
fix(submission): adjust grader scores and trajectory rewards to [0.001, 0.999] range
c3d4615
"""
Medium Task: Build Configuration
Scenario: A CI/CD pipeline is failing because a Python project is
missing critical dependencies in its requirements.txt. The agent
must read build error logs, identify the missing packages, and
patch the requirements file to fix the build.
Setup: Creates a Python project structure with a Flask app, test suite,
and a requirements.txt that is deliberately missing 'flask'.
Grader: Run the build script — exit 0 = pass, else fail.
"""
from __future__ import annotations
import os
import stat
import subprocess
import textwrap
WORKSPACE = "/tmp/sre_tasks/medium_build"
BUILD_SCRIPT = f"{WORKSPACE}/run_build.sh"
REQUIREMENTS = f"{WORKSPACE}/requirements.txt"
APP_FILE = f"{WORKSPACE}/app.py"
TEST_FILE = f"{WORKSPACE}/tests/test_app.py"
BUILD_LOG = f"{WORKSPACE}/build_output.log"
SYSTEM_PROMPT = textwrap.dedent("""\
You are an SRE agent debugging a CI/CD pipeline failure.
INCIDENT REPORT:
- Alert: Build Pipeline FAILED — Stage "install & test" exited non-zero.
- Impact: No new deployments can ship until the build is green.
- Build workspace: /tmp/sre_tasks/medium_build/
Your task:
1. Read the build error log at /tmp/sre_tasks/medium_build/build_output.log
2. Examine the project files to understand what's wrong.
3. Fix the issue so that running `bash /tmp/sre_tasks/medium_build/run_build.sh`
exits with code 0 (success).
The project is a Python Flask web app with a test suite.
The build script installs dependencies and runs tests.
""")
def setup() -> str:
"""
Set up the medium_build task.
Creates a Flask project with a deliberately broken requirements.txt
(missing the 'flask' dependency). Pre-generates a realistic build
failure log.
Returns:
The initial observation message.
"""
# Clean workspace
os.makedirs(f"{WORKSPACE}/tests", exist_ok=True)
# Write the Flask app
app_code = textwrap.dedent("""\
from flask import Flask, jsonify
app = Flask(__name__)
@app.route("/api/status")
def status():
return jsonify({"status": "ok", "version": "1.2.0"})
@app.route("/api/users")
def users():
return jsonify({"users": ["alice", "bob", "charlie"]})
if __name__ == "__main__":
app.run(host="0.0.0.0", port=5000)
""")
with open(APP_FILE, "w") as f:
f.write(app_code)
# Write the test suite
test_code = textwrap.dedent("""\
import pytest
from app import app
@pytest.fixture
def client():
app.config["TESTING"] = True
with app.test_client() as client:
yield client
def test_status_endpoint(client):
response = client.get("/api/status")
assert response.status_code == 200
data = response.get_json()
assert data["status"] == "ok"
def test_users_endpoint(client):
response = client.get("/api/users")
assert response.status_code == 200
data = response.get_json()
assert len(data["users"]) == 3
""")
with open(TEST_FILE, "w") as f:
f.write(test_code)
# Write BROKEN requirements.txt — missing flask!
broken_requirements = textwrap.dedent("""\
# Project dependencies
pytest>=7.0
requests>=2.28.0
""")
with open(REQUIREMENTS, "w") as f:
f.write(broken_requirements)
# Write the build script
build_script = textwrap.dedent(f"""\
#!/bin/bash
set -e
cd {WORKSPACE}
pip install --quiet -r requirements.txt 2>&1
cd {WORKSPACE} && python -m pytest tests/ -v 2>&1
echo "BUILD SUCCESSFUL"
""")
with open(BUILD_SCRIPT, "w") as f:
f.write(build_script)
os.chmod(BUILD_SCRIPT, os.stat(BUILD_SCRIPT).st_mode | stat.S_IEXEC)
# Generate a realistic build failure log
build_log = textwrap.dedent("""\
===== CI/CD Build Pipeline =====
Stage: install & test
Timestamp: 2026-03-25 14:22:07 UTC
[1/2] Installing dependencies from requirements.txt...
Successfully installed pytest-7.4.0 requests-2.31.0
[2/2] Running test suite...
============================= test session starts ==============================
collected 0 items / 2 errors
_____________________________ ERROR collecting tests/test_app.py ______________________________
ImportError while importing test module '/workspace/tests/test_app.py'.
Hint: make sure your test modules/packages have valid Python names.
Traceback (most recent call last):
File "/workspace/tests/test_app.py", line 2, in <module>
from app import app
File "/workspace/app.py", line 1, in <module>
from flask import Flask, jsonify
ModuleNotFoundError: No module named 'flask'
=========================== short test summary info ============================
ERROR tests/test_app.py - ModuleNotFoundError: No module named 'flask'
============================== 2 errors in 0.12s ==============================
BUILD FAILED — exit code 1
""")
with open(BUILD_LOG, "w") as f:
f.write(build_log)
return (
"ALERT: Build Pipeline FAILED — Stage 'install & test' exited non-zero.\n"
"Build log: /tmp/sre_tasks/medium_build/build_output.log\n"
"Build script: /tmp/sre_tasks/medium_build/run_build.sh\n"
"Diagnose and fix the build failure."
)
def grade() -> float:
"""
Grade the medium_build task.
Runs the build script. Returns 0.95 if it exits 0, else 0.05.
"""
try:
result = subprocess.run(
["bash", BUILD_SCRIPT],
capture_output=True,
text=True,
timeout=60,
cwd=WORKSPACE,
)
if result.returncode == 0:
return 0.95
except Exception:
pass
return 0.05
def cleanup() -> None:
"""No persistent processes to clean up for build tasks."""
pass