Spaces:
Running
Running
| # Copyright (c) Meta Platforms, Inc. and affiliates. | |
| # All rights reserved. | |
| # | |
| # This source code is licensed under the BSD-style license found in the | |
| # LICENSE file in the root directory of this source tree. | |
| """Pydantic models for the deployed opencode_env HTTP server. | |
| The server exposes a single MCP tool ``run_rollout`` that takes a Task | |
| (instruction + setup commands + verify commands) plus an LLM endpoint | |
| config, runs one OpenCode rollout end-to-end inside an E2B sandbox, and | |
| returns a :class:`RolloutResult` JSON. | |
| """ | |
| from __future__ import annotations | |
| from typing import Any | |
| from openenv.core.env_server.types import State | |
| from pydantic import BaseModel, Field | |
| class RolloutTurn(BaseModel): | |
| """One intercepted LLM turn captured by the in-sandbox proxy (Mode B).""" | |
| turn: int | |
| finish_reason: str | None = None | |
| completion_tokens: list[str] = Field(default_factory=list) | |
| completion_token_ids: list[int] = Field(default_factory=list) | |
| per_token_logps: list[float] = Field(default_factory=list) | |
| latency_s: float = 0.0 | |
| timestamp: float = 0.0 | |
| upstream_status: int | None = None | |
| upstream_error: dict[str, Any] | None = None | |
| class CommandResult(BaseModel): | |
| """Outcome of one bash command in setup/verify.""" | |
| cmd: str | |
| exit_code: int | |
| stdout: str = "" | |
| stderr: str = "" | |
| duration_s: float = 0.0 | |
| class RolloutResult(BaseModel): | |
| """Full payload returned from one ``run_rollout`` invocation. | |
| The trainer (or any client) decodes this from the MCP tool result JSON | |
| and feeds ``proxy_turns`` + ``reward`` into GRPO. | |
| """ | |
| # Identifiers | |
| task_id: str = "" | |
| sandbox_id: str = "" | |
| # Scalars | |
| reward: float | None = None | |
| agent_exit_code: int | None = None | |
| wall_s: float = 0.0 | |
| mode: str = "transparent_proxy" | |
| # Per-step results | |
| setup_results: list[CommandResult] = Field(default_factory=list) | |
| verify_results: list[CommandResult] = Field(default_factory=list) | |
| # Per-turn LLM trajectory (empty in black_box mode) | |
| proxy_turns: list[RolloutTurn] = Field(default_factory=list) | |
| # Filesystem the agent produced (path -> contents, truncated) | |
| files: dict[str, str] = Field(default_factory=dict) | |
| files_extra: list[str] = Field(default_factory=list) | |
| # Diagnostic tails | |
| agent_log_tail: str = "" | |
| proxy_log_tail: str = "" | |
| # Error surfacing | |
| error: str | None = None | |
| class OpenCodeState(State): | |
| """Per-session env state across calls to one OpenCodeEnvironment instance. | |
| Each HTTP session gets its own env (``SUPPORTS_CONCURRENT_SESSIONS=True`` | |
| on the server class), so this state is per-session. | |
| """ | |
| rollouts_completed: int = 0 | |
| last_reward: float | None = None | |
| last_task_id: str | None = None | |
| last_sandbox_id: str | None = None | |