opencode-env / client.py
AdithyaSK's picture
AdithyaSK HF Staff
Upload folder using huggingface_hub
70f2179 verified
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
"""Client for the deployed opencode_env server.
The server exposes a single MCP tool ``run_rollout`` that runs one OpenCode
rollout in an E2B sandbox and returns a JSON-serialized :class:`RolloutResult`.
Example::
from opencode_env import OpenCodeEnv
with OpenCodeEnv(base_url="https://adithya-sk-opencode-env.hf.space") as env:
env.reset()
result = env.run_rollout(
base_url="https://api.openai.com/v1",
api_key=os.environ["OPENAI_API_KEY"],
model="gpt-4o-mini",
instruction="Create binary_search.py exposing def binary_search(arr, target) -> int...",
setup=[],
verify=["python /home/user/test.py"],
task_id="binary_search_v1",
)
print(result.reward, len(result.proxy_turns))
"""
from __future__ import annotations
import json
from typing import Any
from openenv.core.mcp_client import MCPToolClient
try:
from .models import RolloutResult
except ImportError: # pragma: no cover
from models import RolloutResult # type: ignore
class OpenCodeEnv(MCPToolClient):
"""Typed client for the opencode_env MCP server.
Inherits ``reset`` / ``call_tool`` / ``list_tools`` / ``from_docker_image``
/ context-manager semantics from :class:`MCPToolClient`.
"""
def run_rollout(
self,
*,
# Endpoint — pass either the shorthand selector OR explicit fields.
endpoint: str = "", # "vllm" | "openai" | "hf_router"
base_url: str = "",
api_key: str = "",
model: str = "",
# Task — the "list of bash commands" shape
instruction: str,
setup: list[str] | None = None,
verify: list[str] | None = None,
# Bookkeeping / tunables
task_id: str = "",
mode: str = "transparent_proxy",
disable_thinking: bool | None = None,
max_tokens_cap: int = 4096,
top_logprobs: int = 5,
agent_timeout_s: float = 600.0,
template: str = "",
) -> RolloutResult:
"""Run one OpenCode rollout and return the typed result.
Args:
base_url: OpenAI-compatible LLM endpoint (with trailing /v1).
api_key: Bearer token for the LLM. Use ``"intercepted"`` for vLLM
if it doesn't enforce auth.
model: Model id understood by the LLM endpoint
(e.g. ``"gpt-4o-mini"``, ``"Qwen/Qwen3.5-4B"``,
``"Qwen/Qwen3-4B-Instruct-2507:nscale"``).
instruction: Prompt passed to ``opencode run``.
setup: Bash commands run sequentially **before** the agent starts.
Each command runs in the sandbox; non-zero exit aborts setup.
verify: Bash commands run sequentially **after** the agent exits.
Reward = ``passed_count / total`` unless any command writes a
float to ``/home/user/logs/verifier/reward.txt`` (override).
task_id: Echoed back in the result for traceability.
mode: ``"transparent_proxy"`` (captures per-token logprobs via
an in-sandbox FastAPI proxy) or ``"black_box"`` (no proxy).
disable_thinking: Inject
``chat_template_kwargs.enable_thinking=false`` on forwarded
requests. Needed for Qwen3.5 vLLM; harmless on Instruct
variants; rejected by OpenAI direct.
max_tokens_cap: Clamp on per-turn ``max_tokens``. OpenCode asks
for ~32k by default; gpt-4o-mini caps at 16k.
top_logprobs: Top-k logprobs requested upstream. HF Router caps
at 5; OpenAI accepts up to 20; vLLM is unbounded.
agent_timeout_s: Hard wall-clock budget for one ``opencode run``.
template: E2B template name (e.g. ``"opencode-rl"``). Empty
string uses the default (slow) base image.
Returns:
A :class:`RolloutResult` with reward, per-turn logprobs, file
outputs, setup/verify results, and diagnostic tails.
"""
raw = self.call_tool(
"run_rollout",
endpoint=endpoint,
base_url=base_url,
api_key=api_key,
model=model,
instruction=instruction,
setup=list(setup or []),
verify=list(verify or []),
task_id=task_id,
mode=mode,
disable_thinking=disable_thinking,
max_tokens_cap=max_tokens_cap,
top_logprobs=top_logprobs,
agent_timeout_s=agent_timeout_s,
template=template,
)
return RolloutResult.model_validate_json(_extract_text(raw))
def _extract_text(result: Any) -> str:
"""Pull the JSON text out of whatever shape the MCP layer returns.
Handles the three shapes :meth:`MCPToolClient.call_tool` may surface:
a raw string, a ``CallToolObservation``-like object with
``.result.content[0].text``, or a dict with ``content[0]["text"]``.
"""
if isinstance(result, str):
return result
inner = getattr(result, "result", None)
if inner is not None:
content = getattr(inner, "content", None)
if content:
first = content[0]
text = getattr(first, "text", None)
if isinstance(text, str):
return text
if isinstance(first, dict) and "text" in first:
return first["text"]
if isinstance(result, dict):
content = result.get("content")
if isinstance(content, list) and content:
first = content[0]
if isinstance(first, dict) and "text" in first:
return first["text"]
nested = result.get("result")
if isinstance(nested, dict):
content = nested.get("content")
if isinstance(content, list) and content:
first = content[0]
if isinstance(first, dict) and "text" in first:
return first["text"]
return json.dumps(result, default=str)
content = getattr(result, "content", None)
if content:
first = content[0]
text = getattr(first, "text", None)
if isinstance(text, str):
return text
return str(result)