opencode-env-rollout / server /sandbox_smoke.py
AdithyaSK's picture
AdithyaSK HF Staff
Upload folder using huggingface_hub
d4d3fde verified
"""Stand-alone E2B sandbox smoke β€” boot opencode serve, expose it publicly.
This script isolates "can a sandbox even stand up opencode serve?" from
the rest of the env (no MCP server, no proxy, no primitive, no UI). Good
for when a full rollout fails and you want to rule out the sandbox path.
What it does:
1. Create a fresh E2B sandbox.
2. Write ``~/.config/opencode/opencode.json`` pointing at either:
- the HF Router (default, just needs HF_TOKEN), or
- a user-provided vLLM URL.
3. Install opencode via the upstream one-liner.
4. Start ``opencode serve --port 4096 --hostname 0.0.0.0`` in bg.
5. ``sandbox.get_host(4096)`` β†’ a public ``https://4096-<sbx>.e2b.app``.
6. Poll ``{public}/doc`` until it answers 200.
7. Print the public URL + ``sandbox_id`` and keep the sandbox alive so
you can hit it manually. Ctrl-C closes the sandbox.
Usage:
# HF Router (default)
HF_TOKEN=hf_... uv run python server/sandbox_smoke.py
# or self-hosted vLLM
uv run python server/sandbox_smoke.py \\
--backend vllm \\
--vllm-url https://my-tunnel.example/v1 \\
--model Qwen/Qwen3.5-4B
Once it prints the URL you can:
curl https://4096-<sbx>.e2b.app/global/health
curl https://4096-<sbx>.e2b.app/config
# create + send prompt
SID=$(curl -s -X POST https://4096-<sbx>.e2b.app/session \\
-H 'content-type: application/json' \\
-d '{"title":"smoke"}' | python3 -c 'import json,sys;print(json.load(sys.stdin)["id"])')
curl -X POST https://4096-<sbx>.e2b.app/session/$SID/prompt_async \\
-H 'content-type: application/json' \\
-d '{"parts":[{"type":"text","text":"write hello.py"}]}'
curl -N https://4096-<sbx>.e2b.app/event
"""
from __future__ import annotations
import argparse
import json
import os
import signal
import sys
import time
from pathlib import Path
from typing import Any
# Load the env-server's .env (E2B_API_KEY, HF_TOKEN, etc.) before importing
# anything that needs them. Walks up from this file to find ``openenv/.env``.
try:
from dotenv import load_dotenv
_env_path = Path(__file__).resolve().parent.parent / ".env"
if _env_path.is_file():
load_dotenv(_env_path, override=False)
print(f"loaded env from {_env_path}")
except ImportError:
pass
try:
from e2b_code_interpreter import Sandbox
except ImportError:
from e2b import Sandbox # type: ignore
SERVE_PORT = 4096
CONFIG_DIR = "/home/user/.config/opencode"
CONFIG_PATH = f"{CONFIG_DIR}/opencode.json"
LOG_DIR = "/home/user/logs/agent"
SERVE_LOG = f"{LOG_DIR}/serve.log"
def build_opencode_json(
*,
backend: str,
model_id: str,
base_url: str,
api_key: str,
context_limit: int = 32768,
output_limit: int = 16384,
) -> str:
"""Emit a minimal, valid opencode.json for the chosen backend."""
provider_id = "vllm" if backend == "vllm" else "hf-router"
return json.dumps({
"$schema": "https://opencode.ai/config.json",
"model": f"{provider_id}/{model_id}",
"provider": {
provider_id: {
"npm": "@ai-sdk/openai-compatible",
"name": f"{provider_id} (smoke)",
"options": {
"baseURL": base_url,
"apiKey": api_key,
"timeout": 600_000,
},
"models": {
model_id: {
"name": model_id,
"limit": {"context": context_limit, "output": output_limit},
},
},
},
},
"tools": {"webfetch": False, "question": False},
}, indent=2)
_START = time.time()
def log(msg: str) -> None:
"""Timestamped progress line, flushed so it appears in real time."""
t = time.time() - _START
print(f"[{t:6.1f}s] {msg}", flush=True)
def run_shell(sbx: Any, cmd: str, *, timeout_s: int = 120) -> tuple[int, str, str]:
"""Run a shell command, return (exit_code, stdout, stderr)."""
out = sbx.commands.run(cmd, timeout=timeout_s)
return (out.exit_code, out.stdout or "", out.stderr or "")
def main() -> int:
ap = argparse.ArgumentParser()
ap.add_argument("--backend", choices=["hf", "vllm"], default="hf")
ap.add_argument("--model", default="Qwen/Qwen3.5-397B-A17B:together")
ap.add_argument("--vllm-url", default="")
ap.add_argument("--hf-token", default=os.environ.get("HF_TOKEN", ""))
ap.add_argument("--sandbox-timeout-s", type=int, default=900)
ap.add_argument("--idle-hold-s", type=int, default=1200,
help="keep the sandbox alive for this many seconds after boot")
args = ap.parse_args()
if args.backend == "hf":
if not args.hf_token:
print("ERROR: --backend hf needs --hf-token or $HF_TOKEN", file=sys.stderr)
return 2
base_url = "https://router.huggingface.co/v1"
api_key = args.hf_token
else:
if not args.vllm_url:
print("ERROR: --backend vllm needs --vllm-url", file=sys.stderr)
return 2
base_url = args.vllm_url.rstrip("/")
if not base_url.endswith("/v1"):
base_url += "/v1"
api_key = "anything"
if not os.environ.get("E2B_API_KEY"):
print("ERROR: E2B_API_KEY not set", file=sys.stderr)
return 2
log(f"[1/7] creating sandbox (timeout={args.sandbox_timeout_s}s) …")
sbx = Sandbox.create(timeout=args.sandbox_timeout_s)
log(f" sandbox_id = {sbx.sandbox_id}")
try:
log("[2/7] mkdir config + logs …")
rc, out, err = run_shell(sbx, f"mkdir -p {CONFIG_DIR} {LOG_DIR}")
if rc != 0:
log(f" FAIL rc={rc} stderr={err[:500]}")
return 1
log(f"[3/7] writing {CONFIG_PATH} …")
cfg = build_opencode_json(
backend=args.backend,
model_id=args.model,
base_url=base_url,
api_key=api_key,
)
sbx.files.write(CONFIG_PATH, cfg)
log(f" backend={args.backend} model={args.model}")
log(f" baseURL={base_url}")
log("[4/7] installing opencode via curl opencode.ai/install … (~10-30s cold)")
rc, out, err = run_shell(
sbx,
"curl -fsSL https://opencode.ai/install | bash 2>&1",
timeout_s=300,
)
log(f" install rc={rc}")
if out:
for line in out.strip().splitlines()[-8:]:
log(f" β”‚ {line}")
if rc != 0:
log(" stderr tail:")
for line in (err or "").strip().splitlines()[-10:]:
log(f" β”‚ {line}")
return 1
log("[5/7] verifying opencode binary …")
rc, out, err = run_shell(sbx, '$HOME/.opencode/bin/opencode --version')
log(f" opencode --version rc={rc} out={(out or '').strip()[:120]}")
if rc != 0:
log(f" stderr: {(err or '')[:400]}")
return 1
log(f"[6/7] starting opencode serve in bg on :{SERVE_PORT} …")
serve_cmd = (
'export PATH="$HOME/.opencode/bin:$PATH" && '
f"opencode serve --port {SERVE_PORT} --hostname 0.0.0.0 "
f"> {SERVE_LOG} 2>&1"
)
serve_bg = sbx.commands.run(serve_cmd, background=True)
log(f" serve pid = {getattr(serve_bg, 'pid', '?')}")
host = sbx.get_host(SERVE_PORT)
public_url = f"https://{host}"
log(f" public URL = {public_url}")
log("[7/7] waiting for /doc to answer (polls every 0.5s for 60s) …")
import httpx
ok = False
for i in range(120):
try:
r = httpx.get(f"{public_url}/doc", timeout=5)
if r.status_code == 200:
log(f" /doc ok (poll #{i+1}, {i*0.5:.1f}s)")
ok = True
break
elif i % 6 == 5: # ~every 3s print progress
log(f" /doc β†’ HTTP {r.status_code} (still trying, {i*0.5:.1f}s)")
except Exception as exc:
if i % 6 == 5:
log(f" /doc unreachable ({type(exc).__name__}, {i*0.5:.1f}s)")
time.sleep(0.5)
if not ok:
log(" /doc never answered β€” tailing serve log (last 2KB):")
try:
tail = sbx.files.read(SERVE_LOG)[-2000:]
except Exception as exc:
tail = f"(could not read log: {exc})"
for line in tail.splitlines()[-40:]:
log(f" β”‚ {line}")
return 1
print("\n" + "=" * 70)
print("sandbox is up β€” manual probe recipes:")
print("=" * 70)
print(f"curl -s {public_url}/global/health | jq .")
print(f"curl -s {public_url}/config | jq '.model, .provider'")
print()
print(f"SID=$(curl -s -X POST {public_url}/session \\")
print(" -H 'content-type: application/json' \\")
print(" -d '{\"title\":\"smoke\"}' | jq -r .id)")
print(f"curl -X POST {public_url}/session/$SID/prompt_async \\")
print(" -H 'content-type: application/json' \\")
print(" -d '{\"parts\":[{\"type\":\"text\",\"text\":\"write hello.py and run it\"}]}'")
print(f"curl -N {public_url}/event # SSE stream")
print()
print(f"serve log: sbx.files.read('{SERVE_LOG}')")
print(f"sandbox_id: {sbx.sandbox_id}")
print(f"holding for up to {args.idle_hold_s}s β€” Ctrl-C to close")
print("=" * 70 + "\n")
stopper = {"stop": False}
def _sigh(*_a):
print("\nsignal β€” closing sandbox")
stopper["stop"] = True
signal.signal(signal.SIGINT, _sigh)
signal.signal(signal.SIGTERM, _sigh)
# Periodic /doc ping so we catch opencode-serve crashes in real time.
# Any non-200 (incl. E2B's 502 "port not open") is a crash signal β€”
# dump serve.log and stop the hold.
import httpx
last_ok_ts = time.time()
deadline = time.time() + args.idle_hold_s
def _dump_serve_log() -> None:
try:
tail = sbx.files.read(SERVE_LOG)
log(" --- serve.log tail (last 4KB) ---")
for line in tail[-4000:].splitlines()[-60:]:
log(f" β”‚ {line}")
log(" --- end serve.log ---")
except Exception as exc2:
log(f" could not read serve.log: {exc2}")
# Also list workdir so we can see if the agent did anything.
try:
rc, out, err = run_shell(sbx, "ls -la /home/user/workdir 2>&1 | head -40")
log(" --- workdir ls ---")
for line in (out or err).splitlines():
log(f" β”‚ {line}")
except Exception:
pass
while time.time() < deadline and not stopper["stop"]:
try:
r = httpx.get(f"{public_url}/doc", timeout=5)
if r.status_code == 200:
last_ok_ts = time.time()
else:
log(f"!!! /doc β†’ HTTP {r.status_code} "
f"(last ok {time.time()-last_ok_ts:.1f}s ago) β€” "
f"opencode serve appears dead, dumping log")
_dump_serve_log()
break
except Exception as exc:
log(f"!!! /doc probe failed: {type(exc).__name__}: {exc} "
f"(last ok {time.time()-last_ok_ts:.1f}s ago)")
_dump_serve_log()
break
time.sleep(10.0)
return 0
finally:
try:
print("killing sandbox …")
sbx.kill()
except Exception as exc:
print(f" kill failed (probably already dead): {exc}")
if __name__ == "__main__":
sys.exit(main())