Spaces:
Running
Running
File size: 2,809 Bytes
70f2179 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 | # Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
"""Pydantic models for the deployed opencode_env HTTP server.
The server exposes a single MCP tool ``run_rollout`` that takes a Task
(instruction + setup commands + verify commands) plus an LLM endpoint
config, runs one OpenCode rollout end-to-end inside an E2B sandbox, and
returns a :class:`RolloutResult` JSON.
"""
from __future__ import annotations
from typing import Any
from openenv.core.env_server.types import State
from pydantic import BaseModel, Field
class RolloutTurn(BaseModel):
"""One intercepted LLM turn captured by the in-sandbox proxy (Mode B)."""
turn: int
finish_reason: str | None = None
completion_tokens: list[str] = Field(default_factory=list)
completion_token_ids: list[int] = Field(default_factory=list)
per_token_logps: list[float] = Field(default_factory=list)
latency_s: float = 0.0
timestamp: float = 0.0
upstream_status: int | None = None
upstream_error: dict[str, Any] | None = None
class CommandResult(BaseModel):
"""Outcome of one bash command in setup/verify."""
cmd: str
exit_code: int
stdout: str = ""
stderr: str = ""
duration_s: float = 0.0
class RolloutResult(BaseModel):
"""Full payload returned from one ``run_rollout`` invocation.
The trainer (or any client) decodes this from the MCP tool result JSON
and feeds ``proxy_turns`` + ``reward`` into GRPO.
"""
# Identifiers
task_id: str = ""
sandbox_id: str = ""
# Scalars
reward: float | None = None
agent_exit_code: int | None = None
wall_s: float = 0.0
mode: str = "transparent_proxy"
# Per-step results
setup_results: list[CommandResult] = Field(default_factory=list)
verify_results: list[CommandResult] = Field(default_factory=list)
# Per-turn LLM trajectory (empty in black_box mode)
proxy_turns: list[RolloutTurn] = Field(default_factory=list)
# Filesystem the agent produced (path -> contents, truncated)
files: dict[str, str] = Field(default_factory=dict)
files_extra: list[str] = Field(default_factory=list)
# Diagnostic tails
agent_log_tail: str = ""
proxy_log_tail: str = ""
# Error surfacing
error: str | None = None
class OpenCodeState(State):
"""Per-session env state across calls to one OpenCodeEnvironment instance.
Each HTTP session gets its own env (``SUPPORTS_CONCURRENT_SESSIONS=True``
on the server class), so this state is per-session.
"""
rollouts_completed: int = 0
last_reward: float | None = None
last_task_id: str | None = None
last_sandbox_id: str | None = None
|