Spaces:

AdithyaSK
/

opencode-env-rollout

Sleeping

App Files Files Community

opencode-env-rollout / server /sandbox_smoke.py

AdithyaSK HF Staff

Upload folder using huggingface_hub

d4d3fde verified about 1 month ago

raw

history blame contribute delete

12.1 kB

	"""Stand-alone E2B sandbox smoke — boot opencode serve, expose it publicly.

	This script isolates "can a sandbox even stand up opencode serve?" from
	the rest of the env (no MCP server, no proxy, no primitive, no UI). Good
	for when a full rollout fails and you want to rule out the sandbox path.

	What it does:
	1. Create a fresh E2B sandbox.
	2. Write ``~/.config/opencode/opencode.json`` pointing at either:
	- the HF Router (default, just needs HF_TOKEN), or
	- a user-provided vLLM URL.
	3. Install opencode via the upstream one-liner.
	4. Start ``opencode serve --port 4096 --hostname 0.0.0.0`` in bg.
	5. ``sandbox.get_host(4096)`` → a public ``https://4096-<sbx>.e2b.app``.
	6. Poll ``{public}/doc`` until it answers 200.
	7. Print the public URL + ``sandbox_id`` and keep the sandbox alive so
	you can hit it manually. Ctrl-C closes the sandbox.

	Usage:
	# HF Router (default)
	HF_TOKEN=hf_... uv run python server/sandbox_smoke.py

	# or self-hosted vLLM
	uv run python server/sandbox_smoke.py \\
	--backend vllm \\
	--vllm-url https://my-tunnel.example/v1 \\
	--model Qwen/Qwen3.5-4B

	Once it prints the URL you can:

	curl https://4096-<sbx>.e2b.app/global/health
	curl https://4096-<sbx>.e2b.app/config
	# create + send prompt
	SID=$(curl -s -X POST https://4096-<sbx>.e2b.app/session \\
	-H 'content-type: application/json' \\
	-d '{"title":"smoke"}' \| python3 -c 'import json,sys;print(json.load(sys.stdin)["id"])')
	curl -X POST https://4096-<sbx>.e2b.app/session/$SID/prompt_async \\
	-H 'content-type: application/json' \\
	-d '{"parts":[{"type":"text","text":"write hello.py"}]}'
	curl -N https://4096-<sbx>.e2b.app/event
	"""

	from __future__ import annotations

	import argparse
	import json
	import os
	import signal
	import sys
	import time
	from pathlib import Path
	from typing import Any

	# Load the env-server's .env (E2B_API_KEY, HF_TOKEN, etc.) before importing
	# anything that needs them. Walks up from this file to find ``openenv/.env``.
	try:
	from dotenv import load_dotenv

	_env_path = Path(__file__).resolve().parent.parent / ".env"
	if _env_path.is_file():
	load_dotenv(_env_path, override=False)
	print(f"loaded env from {_env_path}")
	except ImportError:
	pass

	try:
	from e2b_code_interpreter import Sandbox
	except ImportError:
	from e2b import Sandbox # type: ignore


	SERVE_PORT = 4096
	CONFIG_DIR = "/home/user/.config/opencode"
	CONFIG_PATH = f"{CONFIG_DIR}/opencode.json"
	LOG_DIR = "/home/user/logs/agent"
	SERVE_LOG = f"{LOG_DIR}/serve.log"


	def build_opencode_json(
	*,
	backend: str,
	model_id: str,
	base_url: str,
	api_key: str,
	context_limit: int = 32768,
	output_limit: int = 16384,
	) -> str:
	"""Emit a minimal, valid opencode.json for the chosen backend."""
	provider_id = "vllm" if backend == "vllm" else "hf-router"
	return json.dumps({
	"$schema": "https://opencode.ai/config.json",
	"model": f"{provider_id}/{model_id}",
	"provider": {
	provider_id: {
	"npm": "@ai-sdk/openai-compatible",
	"name": f"{provider_id} (smoke)",
	"options": {
	"baseURL": base_url,
	"apiKey": api_key,
	"timeout": 600_000,
	},
	"models": {
	model_id: {
	"name": model_id,
	"limit": {"context": context_limit, "output": output_limit},
	},
	},
	},
	},
	"tools": {"webfetch": False, "question": False},
	}, indent=2)


	_START = time.time()


	def log(msg: str) -> None:
	"""Timestamped progress line, flushed so it appears in real time."""
	t = time.time() - _START
	print(f"[{t:6.1f}s] {msg}", flush=True)


	def run_shell(sbx: Any, cmd: str, *, timeout_s: int = 120) -> tuple[int, str, str]:
	"""Run a shell command, return (exit_code, stdout, stderr)."""
	out = sbx.commands.run(cmd, timeout=timeout_s)
	return (out.exit_code, out.stdout or "", out.stderr or "")


	def main() -> int:
	ap = argparse.ArgumentParser()
	ap.add_argument("--backend", choices=["hf", "vllm"], default="hf")
	ap.add_argument("--model", default="Qwen/Qwen3.5-397B-A17B:together")
	ap.add_argument("--vllm-url", default="")
	ap.add_argument("--hf-token", default=os.environ.get("HF_TOKEN", ""))
	ap.add_argument("--sandbox-timeout-s", type=int, default=900)
	ap.add_argument("--idle-hold-s", type=int, default=1200,
	help="keep the sandbox alive for this many seconds after boot")
	args = ap.parse_args()

	if args.backend == "hf":
	if not args.hf_token:
	print("ERROR: --backend hf needs --hf-token or $HF_TOKEN", file=sys.stderr)
	return 2
	base_url = "https://router.huggingface.co/v1"
	api_key = args.hf_token
	else:
	if not args.vllm_url:
	print("ERROR: --backend vllm needs --vllm-url", file=sys.stderr)
	return 2
	base_url = args.vllm_url.rstrip("/")
	if not base_url.endswith("/v1"):
	base_url += "/v1"
	api_key = "anything"

	if not os.environ.get("E2B_API_KEY"):
	print("ERROR: E2B_API_KEY not set", file=sys.stderr)
	return 2

	log(f"[1/7] creating sandbox (timeout={args.sandbox_timeout_s}s) …")
	sbx = Sandbox.create(timeout=args.sandbox_timeout_s)
	log(f" sandbox_id = {sbx.sandbox_id}")

	try:
	log("[2/7] mkdir config + logs …")
	rc, out, err = run_shell(sbx, f"mkdir -p {CONFIG_DIR} {LOG_DIR}")
	if rc != 0:
	log(f" FAIL rc={rc} stderr={err[:500]}")
	return 1

	log(f"[3/7] writing {CONFIG_PATH} …")
	cfg = build_opencode_json(
	backend=args.backend,
	model_id=args.model,
	base_url=base_url,
	api_key=api_key,
	)
	sbx.files.write(CONFIG_PATH, cfg)
	log(f" backend={args.backend} model={args.model}")
	log(f" baseURL={base_url}")

	log("[4/7] installing opencode via curl opencode.ai/install … (~10-30s cold)")
	rc, out, err = run_shell(
	sbx,
	"curl -fsSL https://opencode.ai/install \| bash 2>&1",
	timeout_s=300,
	)
	log(f" install rc={rc}")
	if out:
	for line in out.strip().splitlines()[-8:]:
	log(f" │ {line}")
	if rc != 0:
	log(" stderr tail:")
	for line in (err or "").strip().splitlines()[-10:]:
	log(f" │ {line}")
	return 1

	log("[5/7] verifying opencode binary …")
	rc, out, err = run_shell(sbx, '$HOME/.opencode/bin/opencode --version')
	log(f" opencode --version rc={rc} out={(out or '').strip()[:120]}")
	if rc != 0:
	log(f" stderr: {(err or '')[:400]}")
	return 1

	log(f"[6/7] starting opencode serve in bg on :{SERVE_PORT} …")
	serve_cmd = (
	'export PATH="$HOME/.opencode/bin:$PATH" && '
	f"opencode serve --port {SERVE_PORT} --hostname 0.0.0.0 "
	f"> {SERVE_LOG} 2>&1"
	)
	serve_bg = sbx.commands.run(serve_cmd, background=True)
	log(f" serve pid = {getattr(serve_bg, 'pid', '?')}")

	host = sbx.get_host(SERVE_PORT)
	public_url = f"https://{host}"
	log(f" public URL = {public_url}")

	log("[7/7] waiting for /doc to answer (polls every 0.5s for 60s) …")
	import httpx
	ok = False
	for i in range(120):
	try:
	r = httpx.get(f"{public_url}/doc", timeout=5)
	if r.status_code == 200:
	log(f" /doc ok (poll #{i+1}, {i*0.5:.1f}s)")
	ok = True
	break
	elif i % 6 == 5: # ~every 3s print progress
	log(f" /doc → HTTP {r.status_code} (still trying, {i*0.5:.1f}s)")
	except Exception as exc:
	if i % 6 == 5:
	log(f" /doc unreachable ({type(exc).__name__}, {i*0.5:.1f}s)")
	time.sleep(0.5)
	if not ok:
	log(" /doc never answered — tailing serve log (last 2KB):")
	try:
	tail = sbx.files.read(SERVE_LOG)[-2000:]
	except Exception as exc:
	tail = f"(could not read log: {exc})"
	for line in tail.splitlines()[-40:]:
	log(f" │ {line}")
	return 1

	print("\n" + "=" * 70)
	print("sandbox is up — manual probe recipes:")
	print("=" * 70)
	print(f"curl -s {public_url}/global/health \| jq .")
	print(f"curl -s {public_url}/config \| jq '.model, .provider'")
	print()
	print(f"SID=$(curl -s -X POST {public_url}/session \\")
	print(" -H 'content-type: application/json' \\")
	print(" -d '{\"title\":\"smoke\"}' \| jq -r .id)")
	print(f"curl -X POST {public_url}/session/$SID/prompt_async \\")
	print(" -H 'content-type: application/json' \\")
	print(" -d '{\"parts\":[{\"type\":\"text\",\"text\":\"write hello.py and run it\"}]}'")
	print(f"curl -N {public_url}/event # SSE stream")
	print()
	print(f"serve log: sbx.files.read('{SERVE_LOG}')")
	print(f"sandbox_id: {sbx.sandbox_id}")
	print(f"holding for up to {args.idle_hold_s}s — Ctrl-C to close")
	print("=" * 70 + "\n")

	stopper = {"stop": False}
	def _sigh(*_a):
	print("\nsignal — closing sandbox")
	stopper["stop"] = True
	signal.signal(signal.SIGINT, _sigh)
	signal.signal(signal.SIGTERM, _sigh)

	# Periodic /doc ping so we catch opencode-serve crashes in real time.
	# Any non-200 (incl. E2B's 502 "port not open") is a crash signal —
	# dump serve.log and stop the hold.
	import httpx
	last_ok_ts = time.time()
	deadline = time.time() + args.idle_hold_s
	def _dump_serve_log() -> None:
	try:
	tail = sbx.files.read(SERVE_LOG)
	log(" --- serve.log tail (last 4KB) ---")
	for line in tail[-4000:].splitlines()[-60:]:
	log(f" │ {line}")
	log(" --- end serve.log ---")
	except Exception as exc2:
	log(f" could not read serve.log: {exc2}")
	# Also list workdir so we can see if the agent did anything.
	try:
	rc, out, err = run_shell(sbx, "ls -la /home/user/workdir 2>&1 \| head -40")
	log(" --- workdir ls ---")
	for line in (out or err).splitlines():
	log(f" │ {line}")
	except Exception:
	pass
	while time.time() < deadline and not stopper["stop"]:
	try:
	r = httpx.get(f"{public_url}/doc", timeout=5)
	if r.status_code == 200:
	last_ok_ts = time.time()
	else:
	log(f"!!! /doc → HTTP {r.status_code} "
	f"(last ok {time.time()-last_ok_ts:.1f}s ago) — "
	f"opencode serve appears dead, dumping log")
	_dump_serve_log()
	break
	except Exception as exc:
	log(f"!!! /doc probe failed: {type(exc).__name__}: {exc} "
	f"(last ok {time.time()-last_ok_ts:.1f}s ago)")
	_dump_serve_log()
	break
	time.sleep(10.0)
	return 0

	finally:
	try:
	print("killing sandbox …")
	sbx.kill()
	except Exception as exc:
	print(f" kill failed (probably already dead): {exc}")


	if __name__ == "__main__":
	sys.exit(main())