File size: 2,305 Bytes
7698d12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9e98d70
 
 
 
 
 
7698d12
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
"""Pydantic models for the OpenCode OpenEnv environment."""

from __future__ import annotations

from typing import Any

from openenv.core.env_server.types import State
from pydantic import BaseModel, Field


class RolloutTurn(BaseModel):
    """One intercepted LLM turn captured by the in-sandbox proxy (Mode B)."""

    turn: int
    request: dict[str, Any] = Field(default_factory=dict)
    response: dict[str, Any] = Field(default_factory=dict)
    completion_tokens: list[str] = Field(default_factory=list)
    completion_token_ids: list[int] = Field(default_factory=list)
    per_token_logps: list[float] = Field(default_factory=list)
    finish_reason: str | None = None
    latency_s: float = 0.0
    timestamp: float = 0.0


class RolloutResult(BaseModel):
    """Outcome of one call to the ``run_rollout`` tool.

    Serialized to JSON as the tool result. The training-side client
    deserializes and feeds ``proxy_turns`` + ``reward`` into GRPO.
    """

    # Identifiers
    task_id: str = ""
    sandbox_id: str = ""

    # Scalars
    reward: float | None = None
    exit_code: int = 0
    wall_s: float = 0.0
    mode: str = "transparent_proxy"

    # Per-turn trajectory (empty in black_box mode)
    proxy_turns: list[RolloutTurn] = Field(default_factory=list)

    # Agent artifacts
    workdir_files: dict[str, str] = Field(default_factory=dict)
    agent_log_tail: str = ""

    # Verifier bookkeeping
    verifier_stdout: str = ""
    verifier_stderr: str = ""
    test_exit_code: int | None = None

    # Errors (if any) surfacing from sandbox/proxy/verifier path
    error: str | None = None

    # Diagnostic tails — populated when the primitive or verifier misbehaves so
    # the client can see WHAT happened inside the sandbox without a second
    # round-trip. Each is truncated to a few KB.
    proxy_log_tail: str = ""
    install_log_tail: str = ""


class OpenCodeState(State):
    """Persistent env state across calls to the single environment instance.

    Each HTTP session gets its own OpenCodeEnvironment (via
    ``SUPPORTS_CONCURRENT_SESSIONS = True`` on the server class), so this
    state is per-session.
    """

    rollouts_completed: int = 0
    last_reward: float | None = None
    last_task_id: str | None = None
    last_sandbox_id: str | None = None