File size: 6,708 Bytes
70f2179
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

"""Unit tests for OpenCodeSession / OpenCodeSessionFactory (no sandbox)."""

from __future__ import annotations

import pytest

from opencode_env.config import OpenCodeConfig
from opencode_env.harness import OpenCodeSession, OpenCodeSessionFactory
from opencode_env.sandbox.base import ExecResult
from opencode_env.task import OpenCodeTask


class _FakeBgJob:
    def __init__(self) -> None:
        self.pid = 123
        self._killed = False

    def wait(self, timeout: float | None = None) -> int:
        return 0

    def kill(self) -> None:
        self._killed = True


class _FakeSandbox:
    """In-memory sandbox that records every interaction."""

    def __init__(self, *, install_exit: int = 0, setup_exit: int = 0) -> None:
        self.sandbox_id = "fake-sbx"
        self.exec_calls: list[tuple[str, dict | None]] = []
        self.written: dict[str, str] = {}
        self.bg_calls: list[tuple[str, dict | None]] = []
        self.killed = False
        self._install_exit = install_exit
        self._setup_exit = setup_exit

    def exec(self, cmd, *, envs=None, cwd=None, timeout=60):
        self.exec_calls.append((cmd, envs))
        # Health probe: the factory issues ``echo ok`` up to 15 times before
        # doing anything else. The fake sandbox is "ready" on the first try.
        if cmd.strip() == "echo ok":
            return ExecResult(0, "ok\n", "")
        if "opencode.ai/install" in cmd:
            return ExecResult(self._install_exit, "opencode 0.0.0\n", "")
        return ExecResult(self._setup_exit, "", "")

    def start_bg(self, cmd, *, envs=None, cwd=None):
        self.bg_calls.append((cmd, envs))
        return _FakeBgJob()

    def write_text(self, path, content):
        self.written[path] = content

    def read_text(self, path):
        return self.written.get(path, "")

    def exists(self, path):
        return path in self.written

    def kill(self):
        self.killed = True


class _FakeBackend:
    def __init__(self, sandbox: _FakeSandbox) -> None:
        self._sandbox = sandbox
        self.create_calls = 0

    def create(self, *, timeout_s=900, envs=None, metadata=None):
        self.create_calls += 1
        return self._sandbox


def _config(**overrides) -> OpenCodeConfig:
    base = dict(
        provider="openai",
        base_url="https://api.openai.com/v1",
        api_key="sk-fake",
        model="openai/gpt-5.3-codex",
    )
    base.update(overrides)
    return OpenCodeConfig(**base)


def test_factory_bootstraps_and_starts_agent():
    sbx = _FakeSandbox()
    backend = _FakeBackend(sbx)
    factory = OpenCodeSessionFactory(config=_config(), sandbox_backend=backend)

    session = factory.create(task="solve fizzbuzz")

    assert backend.create_calls == 1
    assert any("opencode.ai/install" in c for c, _ in sbx.exec_calls)
    assert "/home/user/.config/opencode/opencode.json" in sbx.written
    assert sbx.written["/home/user/task/instruction.md"] == "solve fizzbuzz"
    assert len(sbx.bg_calls) == 1, "agent must be started in background"
    # OPENAI_BASE_URL must be injected into the process env
    _, envs = sbx.bg_calls[0]
    assert envs["OPENAI_BASE_URL"] == "https://api.openai.com/v1"
    assert envs["OPENAI_API_KEY"] == "sk-fake"
    assert isinstance(session, OpenCodeSession)


def test_factory_runs_task_setup_shell():
    sbx = _FakeSandbox()
    factory = OpenCodeSessionFactory(
        config=_config(), sandbox_backend=_FakeBackend(sbx)
    )
    task = OpenCodeTask(instruction="x", setup_shell="pip install pytest")

    factory.create(task=task)

    setup_cmds = [c for c, _ in sbx.exec_calls if "pip install" in c]
    assert setup_cmds == ["pip install pytest"]


def test_factory_uploads_extra_files():
    sbx = _FakeSandbox()
    factory = OpenCodeSessionFactory(
        config=_config(), sandbox_backend=_FakeBackend(sbx)
    )
    task = OpenCodeTask(
        instruction="run it",
        upload_files={"/home/user/workdir/hello.py": "print('hi')"},
    )

    factory.create(task=task)

    assert sbx.written["/home/user/workdir/hello.py"] == "print('hi')"


def test_factory_kills_sandbox_on_install_failure():
    sbx = _FakeSandbox(install_exit=1)
    factory = OpenCodeSessionFactory(
        config=_config(), sandbox_backend=_FakeBackend(sbx)
    )

    with pytest.raises(RuntimeError, match="install failed"):
        factory.create(task="x")
    assert sbx.killed


def test_factory_accepts_transparent_proxy_mode():
    f = OpenCodeSessionFactory(
        config=_config(),
        sandbox_backend=_FakeBackend(_FakeSandbox()),
        mode="transparent_proxy",
    )
    assert f._mode == "transparent_proxy"


def test_factory_rejects_unknown_mode():
    with pytest.raises(ValueError, match="Unknown mode"):
        OpenCodeSessionFactory(
            config=_config(),
            sandbox_backend=_FakeBackend(_FakeSandbox()),
            mode="bogus",  # type: ignore[arg-type]
        )


def test_session_initial_messages():
    sbx = _FakeSandbox()
    session = OpenCodeSession(
        sandbox=sbx,
        config=_config(),
        task=OpenCodeTask(instruction="hi"),
    )
    assert session.initial_messages() == [{"role": "user", "content": "hi"}]


def test_session_verify_without_verifier_returns_none_reward():
    sbx = _FakeSandbox()
    session = OpenCodeSession(
        sandbox=sbx,
        config=_config(),
        task=OpenCodeTask(instruction="x"),
    )
    result = session.verify(transcript=[])
    assert result.env_reward is None
    assert result.done is True


def test_session_verify_calls_user_verifier():
    from openenv.core.harness import VerifyResult

    sbx = _FakeSandbox()
    calls = []

    def verifier(sandbox, task):
        calls.append((sandbox.sandbox_id, task.instruction))
        return VerifyResult(env_reward=1.0, done=True, metrics={"tests": "pass"})

    session = OpenCodeSession(
        sandbox=sbx,
        config=_config(),
        task=OpenCodeTask(instruction="do"),
        verifier=verifier,
    )
    result = session.verify(transcript=[])
    assert calls == [("fake-sbx", "do")]
    assert result.env_reward == 1.0
    assert result.metrics == {"tests": "pass"}


def test_session_close_kills_job_and_sandbox():
    sbx = _FakeSandbox()
    session = OpenCodeSession(
        sandbox=sbx,
        config=_config(),
        task=OpenCodeTask(instruction="x"),
    )
    session._bg_job = _FakeBgJob()
    session.close()
    assert session._bg_job is None
    assert sbx.killed