Spaces:

DEVessi
/

devops_sandbox

Runtime error

App Files Files Community

devops_sandbox / baseline.py

DEVessi

Upload folder using huggingface_hub

cd601a6 verified 5 days ago

raw

history blame contribute delete

7.01 kB

	#!/usr/bin/env python3
	# Copyright (c) Meta Platforms, Inc. and affiliates.
	# All rights reserved.
	#
	# This source code is licensed under the BSD-style license found in the
	# LICENSE file in the root directory of this source tree.

	"""
	Baseline inference script for the Self-Healing DevOps Sandbox.

	Uses an LLM (via the OpenAI-compatible API) to diagnose and fix a broken
	Node.js backend running inside a Docker container.

	Usage:
	export OPENAI_API_KEY="sk-..."
	python baseline.py

	# Or with a custom endpoint (e.g., local vLLM):
	export OPENAI_BASE_URL="http://localhost:8080/v1"
	python baseline.py
	"""

	import json
	import os
	import sys

	try:
	from openai import OpenAI
	except ImportError:
	print("ERROR: 'openai' package is required. Install with: pip install openai")
	sys.exit(1)

	from devops_sandbox import BashAction, DevopsSandboxEnv

	# ---------------------------------------------------------------------------
	# Configuration
	# ---------------------------------------------------------------------------
	ENV_URL = os.getenv("DEVOPS_SANDBOX_URL", "http://localhost:8000")
	MODEL = os.getenv("OPENAI_MODEL", "gpt-4o-mini")
	MAX_TURNS = int(os.getenv("MAX_TURNS", "30"))

	SYSTEM_PROMPT = """\
	You are an expert DevOps engineer and Node.js developer.

	You have been dropped into a Linux container with a broken Express.js backend in /app.
	Your goal is to diagnose and fix ALL bugs so the app runs correctly.

	RULES:
	1. Respond ONLY with a JSON object: {"command": "<bash command>"}
	2. Use standard bash/Linux commands (ls, cat, grep, sed, node, npm, etc.)
	3. Do NOT use interactive editors (vi, nano). Use sed or echo/cat with redirection.
	4. After fixing bugs, restart the app with: cd /app && npm start &
	5. Be methodical: read files first, understand the bug, then fix it.

	EXPECTED FINAL STATE:
	- App starts without errors on port 3000
	- GET /health → 200
	- GET /api/users → 200 with JSON containing "users" array
	- GET /api/data → 200 with JSON containing "records" array
	"""


	def extract_command(llm_response: str) -> str:
	"""Extract a bash command from the LLM's response (JSON or raw text)."""
	# Try JSON parsing first
	try:
	data = json.loads(llm_response.strip())
	if isinstance(data, dict) and "command" in data:
	return data["command"]
	except (json.JSONDecodeError, TypeError):
	pass

	# Try extracting from markdown code block
	if "```" in llm_response:
	lines = llm_response.split("```")
	for block in lines[1::2]: # odd indices are code blocks
	code = block.strip()
	if code.startswith("json"):
	code = code[4:].strip()
	try:
	data = json.loads(code)
	if isinstance(data, dict) and "command" in data:
	return data["command"]
	except (json.JSONDecodeError, TypeError):
	pass
	elif code.startswith("bash") or code.startswith("sh"):
	code = code.split("\n", 1)[-1].strip()
	return code
	else:
	first_line = code.split("\n")[0].strip()
	if first_line:
	return first_line

	# Fallback: treat entire response as a command
	cmd = llm_response.strip().strip("`").strip()
	if cmd.startswith("{"):
	# One more try
	try:
	return json.loads(cmd)["command"]
	except Exception:
	pass
	return cmd


	def main():
	print("=" * 60)
	print(" Self-Healing DevOps Sandbox — Baseline Agent")
	print("=" * 60)

	client = OpenAI()

	messages = [{"role": "system", "content": SYSTEM_PROMPT}]

	with DevopsSandboxEnv(base_url=ENV_URL).sync() as env:
	# Reset the environment
	print("\n[*] Resetting environment...")
	result = env.reset()
	obs = result.observation

	print(f"\n[INIT] Task prompt:\n{obs.stdout[:500]}...")
	print(f"[INIT] Score: {obs.grader_score} \| Feedback: {obs.grader_feedback}")

	# Add initial observation to messages
	messages.append({
	"role": "user",
	"content": (
	f"Here is the initial state of the broken app:\n\n"
	f"```\n{obs.stdout}\n```\n\n"
	f"Current directory: {obs.current_dir}\n"
	f"Score: {obs.grader_score}/1.0\n\n"
	f"What bash command should I run first?"
	),
	})

	for turn in range(1, MAX_TURNS + 1):
	print(f"\n{'─' * 40}")
	print(f"Turn {turn}/{MAX_TURNS}")
	print(f"{'─' * 40}")

	# Get LLM response
	try:
	response = client.chat.completions.create(
	model=MODEL,
	messages=messages,
	temperature=0.2,
	max_tokens=256,
	)
	llm_text = response.choices[0].message.content or ""
	except Exception as e:
	print(f"[ERROR] LLM call failed: {e}")
	break

	# Extract command
	command = extract_command(llm_text)
	if not command:
	print("[WARN] Could not extract command from LLM response")
	command = "ls -la /app"

	print(f"[CMD] {command}")

	# Execute in environment
	result = env.step(BashAction(command=command))
	obs = result.observation

	stdout_preview = obs.stdout[:300] if obs.stdout else "(empty)"
	stderr_preview = obs.stderr[:200] if obs.stderr else "(none)"
	print(f"[OUT] {stdout_preview}")
	if obs.stderr:
	print(f"[ERR] {stderr_preview}")
	print(f"[SCORE] {obs.grader_score:.2f} \| {obs.grader_feedback}")

	# Add to conversation
	messages.append({"role": "assistant", "content": llm_text})
	messages.append({
	"role": "user",
	"content": (
	f"Command output:\n"
	f"stdout:\n```\n{obs.stdout}\n```\n"
	f"stderr:\n```\n{obs.stderr}\n```\n"
	f"Current score: {obs.grader_score}/1.0\n"
	f"Grader feedback: {obs.grader_feedback}\n\n"
	f"What command should I run next?"
	),
	})

	# Check if done
	if result.done:
	print(f"\n{'=' * 60}")
	if obs.grader_score >= 1.0:
	print(" ✅ ALL BUGS FIXED — PERFECT SCORE!")
	else:
	print(f" Episode ended. Final score: {obs.grader_score:.2f}/1.0")
	print(f"{'=' * 60}")
	break
	else:
	print(f"\n[!] Max turns ({MAX_TURNS}) reached.")
	print(f" Final score: {obs.grader_score:.2f}/1.0")

	print("\n[*] Done.")


	if __name__ == "__main__":
	main()