Spaces:
Runtime error
Runtime error
| #!/usr/bin/env python3 | |
| # Copyright (c) Meta Platforms, Inc. and affiliates. | |
| # All rights reserved. | |
| # | |
| # This source code is licensed under the BSD-style license found in the | |
| # LICENSE file in the root directory of this source tree. | |
| """ | |
| Baseline inference script for the Self-Healing DevOps Sandbox. | |
| Uses an LLM (via the OpenAI-compatible API) to diagnose and fix a broken | |
| Node.js backend running inside a Docker container. | |
| Usage: | |
| export OPENAI_API_KEY="sk-..." | |
| python baseline.py | |
| # Or with a custom endpoint (e.g., local vLLM): | |
| export OPENAI_BASE_URL="http://localhost:8080/v1" | |
| python baseline.py | |
| """ | |
| import json | |
| import os | |
| import sys | |
| try: | |
| from openai import OpenAI | |
| except ImportError: | |
| print("ERROR: 'openai' package is required. Install with: pip install openai") | |
| sys.exit(1) | |
| from devops_sandbox import BashAction, DevopsSandboxEnv | |
| # --------------------------------------------------------------------------- | |
| # Configuration | |
| # --------------------------------------------------------------------------- | |
| ENV_URL = os.getenv("DEVOPS_SANDBOX_URL", "http://localhost:8000") | |
| MODEL = os.getenv("OPENAI_MODEL", "gpt-4o-mini") | |
| MAX_TURNS = int(os.getenv("MAX_TURNS", "30")) | |
| SYSTEM_PROMPT = """\ | |
| You are an expert DevOps engineer and Node.js developer. | |
| You have been dropped into a Linux container with a broken Express.js backend in /app. | |
| Your goal is to diagnose and fix ALL bugs so the app runs correctly. | |
| RULES: | |
| 1. Respond ONLY with a JSON object: {"command": "<bash command>"} | |
| 2. Use standard bash/Linux commands (ls, cat, grep, sed, node, npm, etc.) | |
| 3. Do NOT use interactive editors (vi, nano). Use sed or echo/cat with redirection. | |
| 4. After fixing bugs, restart the app with: cd /app && npm start & | |
| 5. Be methodical: read files first, understand the bug, then fix it. | |
| EXPECTED FINAL STATE: | |
| - App starts without errors on port 3000 | |
| - GET /health β 200 | |
| - GET /api/users β 200 with JSON containing "users" array | |
| - GET /api/data β 200 with JSON containing "records" array | |
| """ | |
| def extract_command(llm_response: str) -> str: | |
| """Extract a bash command from the LLM's response (JSON or raw text).""" | |
| # Try JSON parsing first | |
| try: | |
| data = json.loads(llm_response.strip()) | |
| if isinstance(data, dict) and "command" in data: | |
| return data["command"] | |
| except (json.JSONDecodeError, TypeError): | |
| pass | |
| # Try extracting from markdown code block | |
| if "```" in llm_response: | |
| lines = llm_response.split("```") | |
| for block in lines[1::2]: # odd indices are code blocks | |
| code = block.strip() | |
| if code.startswith("json"): | |
| code = code[4:].strip() | |
| try: | |
| data = json.loads(code) | |
| if isinstance(data, dict) and "command" in data: | |
| return data["command"] | |
| except (json.JSONDecodeError, TypeError): | |
| pass | |
| elif code.startswith("bash") or code.startswith("sh"): | |
| code = code.split("\n", 1)[-1].strip() | |
| return code | |
| else: | |
| first_line = code.split("\n")[0].strip() | |
| if first_line: | |
| return first_line | |
| # Fallback: treat entire response as a command | |
| cmd = llm_response.strip().strip("`").strip() | |
| if cmd.startswith("{"): | |
| # One more try | |
| try: | |
| return json.loads(cmd)["command"] | |
| except Exception: | |
| pass | |
| return cmd | |
| def main(): | |
| print("=" * 60) | |
| print(" Self-Healing DevOps Sandbox β Baseline Agent") | |
| print("=" * 60) | |
| client = OpenAI() | |
| messages = [{"role": "system", "content": SYSTEM_PROMPT}] | |
| with DevopsSandboxEnv(base_url=ENV_URL).sync() as env: | |
| # Reset the environment | |
| print("\n[*] Resetting environment...") | |
| result = env.reset() | |
| obs = result.observation | |
| print(f"\n[INIT] Task prompt:\n{obs.stdout[:500]}...") | |
| print(f"[INIT] Score: {obs.grader_score} | Feedback: {obs.grader_feedback}") | |
| # Add initial observation to messages | |
| messages.append({ | |
| "role": "user", | |
| "content": ( | |
| f"Here is the initial state of the broken app:\n\n" | |
| f"```\n{obs.stdout}\n```\n\n" | |
| f"Current directory: {obs.current_dir}\n" | |
| f"Score: {obs.grader_score}/1.0\n\n" | |
| f"What bash command should I run first?" | |
| ), | |
| }) | |
| for turn in range(1, MAX_TURNS + 1): | |
| print(f"\n{'β' * 40}") | |
| print(f"Turn {turn}/{MAX_TURNS}") | |
| print(f"{'β' * 40}") | |
| # Get LLM response | |
| try: | |
| response = client.chat.completions.create( | |
| model=MODEL, | |
| messages=messages, | |
| temperature=0.2, | |
| max_tokens=256, | |
| ) | |
| llm_text = response.choices[0].message.content or "" | |
| except Exception as e: | |
| print(f"[ERROR] LLM call failed: {e}") | |
| break | |
| # Extract command | |
| command = extract_command(llm_text) | |
| if not command: | |
| print("[WARN] Could not extract command from LLM response") | |
| command = "ls -la /app" | |
| print(f"[CMD] {command}") | |
| # Execute in environment | |
| result = env.step(BashAction(command=command)) | |
| obs = result.observation | |
| stdout_preview = obs.stdout[:300] if obs.stdout else "(empty)" | |
| stderr_preview = obs.stderr[:200] if obs.stderr else "(none)" | |
| print(f"[OUT] {stdout_preview}") | |
| if obs.stderr: | |
| print(f"[ERR] {stderr_preview}") | |
| print(f"[SCORE] {obs.grader_score:.2f} | {obs.grader_feedback}") | |
| # Add to conversation | |
| messages.append({"role": "assistant", "content": llm_text}) | |
| messages.append({ | |
| "role": "user", | |
| "content": ( | |
| f"Command output:\n" | |
| f"stdout:\n```\n{obs.stdout}\n```\n" | |
| f"stderr:\n```\n{obs.stderr}\n```\n" | |
| f"Current score: {obs.grader_score}/1.0\n" | |
| f"Grader feedback: {obs.grader_feedback}\n\n" | |
| f"What command should I run next?" | |
| ), | |
| }) | |
| # Check if done | |
| if result.done: | |
| print(f"\n{'=' * 60}") | |
| if obs.grader_score >= 1.0: | |
| print(" β ALL BUGS FIXED β PERFECT SCORE!") | |
| else: | |
| print(f" Episode ended. Final score: {obs.grader_score:.2f}/1.0") | |
| print(f"{'=' * 60}") | |
| break | |
| else: | |
| print(f"\n[!] Max turns ({MAX_TURNS}) reached.") | |
| print(f" Final score: {obs.grader_score:.2f}/1.0") | |
| print("\n[*] Done.") | |
| if __name__ == "__main__": | |
| main() | |