| import asyncio |
| from types import SimpleNamespace |
| from pathlib import Path |
|
|
| import pytest |
|
|
| from agent.config import Config |
| from agent.core import agent_loop |
| from agent.core.agent_loop import _needs_approval |
| from agent.core.session import OpType |
| from agent.core.tools import create_builtin_tools |
| from agent.tools.jobs_tool import HF_JOBS_TOOL_SPEC |
| from agent.tools.sandbox_tool import get_sandbox_tools |
|
|
|
|
| def test_default_cpu_sandbox_create_does_not_require_approval(): |
| config = SimpleNamespace(yolo_mode=False) |
|
|
| assert _needs_approval("sandbox_create", {}, config) is False |
| assert _needs_approval("sandbox_create", {"hardware": "cpu-basic"}, config) is False |
|
|
|
|
| def test_non_default_sandbox_create_still_requires_approval(): |
| config = SimpleNamespace(yolo_mode=False) |
|
|
| assert ( |
| _needs_approval("sandbox_create", {"hardware": "cpu-upgrade"}, config) is True |
| ) |
| assert _needs_approval("sandbox_create", {"hardware": "t4-small"}, config) is True |
|
|
|
|
| def test_prompt_and_tool_specs_do_not_require_cpu_sandbox_create(): |
| prompt = Path("agent/prompts/system_prompt_v3.yaml").read_text() |
| tool_specs = {tool.name: tool.description for tool in get_sandbox_tools()} |
|
|
| assert "sandbox_create → install deps" not in prompt |
| assert "Do NOT call sandbox_create before normal CPU work" in prompt |
| assert "cpu-basic sandbox is already available" in prompt |
|
|
| assert ( |
| "cpu-basic sandbox is already started automatically" |
| in tool_specs["sandbox_create"] |
| ) |
| assert "started automatically for normal CPU work" in tool_specs["bash"] |
|
|
|
|
| def test_prompt_rejects_local_machine_paths_for_hf_jobs_scripts(): |
| prompt = Path("agent/prompts/system_prompt_v3.yaml").read_text() |
|
|
| assert "Never pass a local machine path to hf_jobs.script" in prompt |
| assert "/fsx/..." in prompt |
| assert "inline Python source code" in prompt |
| assert "a file already written in the session sandbox" in prompt |
|
|
|
|
| def test_prompt_and_hf_jobs_spec_require_gpu_preflight_for_gpu_jobs(): |
| prompt = Path("agent/prompts/system_prompt_v3.yaml").read_text() |
| jobs_description = HF_JOBS_TOOL_SPEC["description"] |
|
|
| assert "GPU preflight is mandatory before hf_jobs" in prompt |
| assert "GPU sandbox smoke test" in prompt |
| assert "If you skip GPU sandbox preflight" in prompt |
| assert "you MUST create a GPU sandbox with sandbox_create first" in jobs_description |
| assert "If skipped, state why before calling hf_jobs" in jobs_description |
|
|
|
|
| def test_local_tool_runtime_excludes_sandbox_create(): |
| tool_names = {tool.name for tool in create_builtin_tools(local_mode=True)} |
|
|
| assert {"bash", "read", "write", "edit"} <= tool_names |
| assert "sandbox_create" not in tool_names |
|
|
|
|
| def test_sandbox_tool_runtime_includes_sandbox_create(): |
| tool_names = {tool.name for tool in create_builtin_tools(local_mode=False)} |
|
|
| assert {"sandbox_create", "bash", "read", "write", "edit"} <= tool_names |
|
|
|
|
| @pytest.mark.asyncio |
| async def test_cli_sandbox_runtime_preloads_and_tears_down_sandbox(monkeypatch): |
| started = [] |
| torn_down = [] |
|
|
| class FakeToolRouter: |
| tools = {} |
|
|
| def get_tool_specs_for_llm(self): |
| return [] |
|
|
| async def __aenter__(self): |
| return self |
|
|
| async def __aexit__(self, exc_type, exc, tb): |
| return None |
|
|
| def fake_start_cpu_sandbox_preload(session): |
| started.append(session) |
| return None |
|
|
| async def fake_teardown_session_sandbox(session): |
| torn_down.append(session) |
|
|
| monkeypatch.setattr( |
| agent_loop, "start_cpu_sandbox_preload", fake_start_cpu_sandbox_preload |
| ) |
| monkeypatch.setattr( |
| agent_loop, "teardown_session_sandbox", fake_teardown_session_sandbox |
| ) |
|
|
| submission_queue = asyncio.Queue() |
| event_queue = asyncio.Queue() |
| session_holder = [None] |
| config = Config.model_validate( |
| {"model_name": "openai/gpt-5.5", "save_sessions": False} |
| ) |
|
|
| task = asyncio.create_task( |
| agent_loop.submission_loop( |
| submission_queue, |
| event_queue, |
| config=config, |
| tool_router=FakeToolRouter(), |
| session_holder=session_holder, |
| hf_token="hf-token", |
| user_id="tester", |
| local_mode=False, |
| ) |
| ) |
|
|
| ready = await asyncio.wait_for(event_queue.get(), timeout=1) |
| assert ready.event_type == "ready" |
| assert started == [session_holder[0]] |
| assert session_holder[0].local_mode is False |
|
|
| await submission_queue.put( |
| SimpleNamespace( |
| operation=SimpleNamespace(op_type=OpType.SHUTDOWN, data=None), |
| ) |
| ) |
| await asyncio.wait_for(task, timeout=1) |
|
|
| assert torn_down == [session_holder[0]] |
|
|