#!/usr/bin/env python3 # Copyright (c) Meta Platforms, Inc. and affiliates. # All rights reserved. # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. """ Baseline inference script for the Self-Healing DevOps Sandbox. Uses an LLM (via the OpenAI-compatible API) to diagnose and fix a broken Node.js backend running inside a Docker container. Usage: export OPENAI_API_KEY="sk-..." python baseline.py # Or with a custom endpoint (e.g., local vLLM): export OPENAI_BASE_URL="http://localhost:8080/v1" python baseline.py """ import json import os import sys try: from openai import OpenAI except ImportError: print("ERROR: 'openai' package is required. Install with: pip install openai") sys.exit(1) from devops_sandbox import BashAction, DevopsSandboxEnv # --------------------------------------------------------------------------- # Configuration # --------------------------------------------------------------------------- ENV_URL = os.getenv("DEVOPS_SANDBOX_URL", "http://localhost:8000") MODEL = os.getenv("OPENAI_MODEL", "gpt-4o-mini") MAX_TURNS = int(os.getenv("MAX_TURNS", "30")) SYSTEM_PROMPT = """\ You are an expert DevOps engineer and Node.js developer. You have been dropped into a Linux container with a broken Express.js backend in /app. Your goal is to diagnose and fix ALL bugs so the app runs correctly. RULES: 1. Respond ONLY with a JSON object: {"command": ""} 2. Use standard bash/Linux commands (ls, cat, grep, sed, node, npm, etc.) 3. Do NOT use interactive editors (vi, nano). Use sed or echo/cat with redirection. 4. After fixing bugs, restart the app with: cd /app && npm start & 5. Be methodical: read files first, understand the bug, then fix it. EXPECTED FINAL STATE: - App starts without errors on port 3000 - GET /health → 200 - GET /api/users → 200 with JSON containing "users" array - GET /api/data → 200 with JSON containing "records" array """ def extract_command(llm_response: str) -> str: """Extract a bash command from the LLM's response (JSON or raw text).""" # Try JSON parsing first try: data = json.loads(llm_response.strip()) if isinstance(data, dict) and "command" in data: return data["command"] except (json.JSONDecodeError, TypeError): pass # Try extracting from markdown code block if "```" in llm_response: lines = llm_response.split("```") for block in lines[1::2]: # odd indices are code blocks code = block.strip() if code.startswith("json"): code = code[4:].strip() try: data = json.loads(code) if isinstance(data, dict) and "command" in data: return data["command"] except (json.JSONDecodeError, TypeError): pass elif code.startswith("bash") or code.startswith("sh"): code = code.split("\n", 1)[-1].strip() return code else: first_line = code.split("\n")[0].strip() if first_line: return first_line # Fallback: treat entire response as a command cmd = llm_response.strip().strip("`").strip() if cmd.startswith("{"): # One more try try: return json.loads(cmd)["command"] except Exception: pass return cmd def main(): print("=" * 60) print(" Self-Healing DevOps Sandbox — Baseline Agent") print("=" * 60) client = OpenAI() messages = [{"role": "system", "content": SYSTEM_PROMPT}] with DevopsSandboxEnv(base_url=ENV_URL).sync() as env: # Reset the environment print("\n[*] Resetting environment...") result = env.reset() obs = result.observation print(f"\n[INIT] Task prompt:\n{obs.stdout[:500]}...") print(f"[INIT] Score: {obs.grader_score} | Feedback: {obs.grader_feedback}") # Add initial observation to messages messages.append({ "role": "user", "content": ( f"Here is the initial state of the broken app:\n\n" f"```\n{obs.stdout}\n```\n\n" f"Current directory: {obs.current_dir}\n" f"Score: {obs.grader_score}/1.0\n\n" f"What bash command should I run first?" ), }) for turn in range(1, MAX_TURNS + 1): print(f"\n{'─' * 40}") print(f"Turn {turn}/{MAX_TURNS}") print(f"{'─' * 40}") # Get LLM response try: response = client.chat.completions.create( model=MODEL, messages=messages, temperature=0.2, max_tokens=256, ) llm_text = response.choices[0].message.content or "" except Exception as e: print(f"[ERROR] LLM call failed: {e}") break # Extract command command = extract_command(llm_text) if not command: print("[WARN] Could not extract command from LLM response") command = "ls -la /app" print(f"[CMD] {command}") # Execute in environment result = env.step(BashAction(command=command)) obs = result.observation stdout_preview = obs.stdout[:300] if obs.stdout else "(empty)" stderr_preview = obs.stderr[:200] if obs.stderr else "(none)" print(f"[OUT] {stdout_preview}") if obs.stderr: print(f"[ERR] {stderr_preview}") print(f"[SCORE] {obs.grader_score:.2f} | {obs.grader_feedback}") # Add to conversation messages.append({"role": "assistant", "content": llm_text}) messages.append({ "role": "user", "content": ( f"Command output:\n" f"stdout:\n```\n{obs.stdout}\n```\n" f"stderr:\n```\n{obs.stderr}\n```\n" f"Current score: {obs.grader_score}/1.0\n" f"Grader feedback: {obs.grader_feedback}\n\n" f"What command should I run next?" ), }) # Check if done if result.done: print(f"\n{'=' * 60}") if obs.grader_score >= 1.0: print(" ✅ ALL BUGS FIXED — PERFECT SCORE!") else: print(f" Episode ended. Final score: {obs.grader_score:.2f}/1.0") print(f"{'=' * 60}") break else: print(f"\n[!] Max turns ({MAX_TURNS}) reached.") print(f" Final score: {obs.grader_score:.2f}/1.0") print("\n[*] Done.") if __name__ == "__main__": main()