File size: 5,718 Bytes
77324b8 754345f 77324b8 754345f 77324b8 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 | from types import SimpleNamespace
import pytest
from agent.config import Config
from agent.core import agent_loop
from agent.core.cost_estimation import CostEstimate
def _config(**overrides):
data = {
"model_name": "moonshotai/Kimi-K2.6",
"confirm_cpu_jobs": True,
"auto_file_upload": False,
"yolo_mode": False,
**overrides,
}
return Config.model_validate(data)
def _session(*, cap=5.0, spent=0.0, enabled=True):
return SimpleNamespace(
config=_config(),
auto_approval_enabled=enabled,
auto_approval_cost_cap_usd=cap,
auto_approval_estimated_spend_usd=spent,
sandbox=None,
)
@pytest.mark.asyncio
async def test_session_yolo_auto_approves_non_costed_approval_tool():
decision = await agent_loop._approval_decision(
"hf_repo_files",
{"operation": "upload", "path": "README.md"},
_session(),
)
assert decision.requires_approval is False
assert decision.auto_approved is True
@pytest.mark.asyncio
@pytest.mark.parametrize(
"operation",
["scheduled run", "scheduled uv", "scheduled run"],
)
async def test_scheduled_hf_jobs_always_require_manual_approval(operation):
session = _session()
session.config.yolo_mode = True
decision = await agent_loop._approval_decision(
"hf_jobs",
{"operation": operation, "script": "print(1)"},
session,
)
assert decision.requires_approval is True
assert decision.auto_approval_blocked is True
assert "Scheduled HF jobs" in decision.block_reason
assert agent_loop._needs_approval(
"hf_jobs", {"operation": operation}, session.config
)
@pytest.mark.asyncio
async def test_immediate_hf_job_under_cap_auto_runs(monkeypatch):
async def fake_estimate(*args, **kwargs):
return CostEstimate(estimated_cost_usd=2.0, billable=True)
monkeypatch.setattr(agent_loop, "estimate_tool_cost", fake_estimate)
decision = await agent_loop._approval_decision(
"hf_jobs",
{"operation": "run", "hardware_flavor": "a10g-large", "timeout": "1h"},
_session(cap=5.0, spent=1.0),
)
assert decision.requires_approval is False
assert decision.auto_approved is True
assert decision.estimated_cost_usd == 2.0
@pytest.mark.asyncio
async def test_immediate_hf_job_over_cap_falls_back_to_approval(monkeypatch):
async def fake_estimate(*args, **kwargs):
return CostEstimate(estimated_cost_usd=2.0, billable=True)
monkeypatch.setattr(agent_loop, "estimate_tool_cost", fake_estimate)
decision = await agent_loop._approval_decision(
"hf_jobs",
{"operation": "run", "hardware_flavor": "a10g-large", "timeout": "1h"},
_session(cap=5.0, spent=4.0),
)
assert decision.requires_approval is True
assert decision.auto_approval_blocked is True
assert "exceeds" in decision.block_reason
assert decision.remaining_cap_usd == 1.0
@pytest.mark.asyncio
async def test_unknown_cost_falls_back_to_approval(monkeypatch):
async def fake_estimate(*args, **kwargs):
return CostEstimate(
estimated_cost_usd=None,
billable=True,
block_reason="No price is available.",
)
monkeypatch.setattr(agent_loop, "estimate_tool_cost", fake_estimate)
decision = await agent_loop._approval_decision(
"sandbox_create",
{"hardware": "mystery-gpu"},
_session(),
)
assert decision.requires_approval is True
assert decision.auto_approval_blocked is True
assert decision.estimated_cost_usd is None
@pytest.mark.asyncio
async def test_batch_reservation_blocks_second_over_budget_job(monkeypatch):
async def fake_estimate(*args, **kwargs):
return CostEstimate(estimated_cost_usd=3.0, billable=True)
monkeypatch.setattr(agent_loop, "estimate_tool_cost", fake_estimate)
session = _session(cap=5.0, spent=0.0)
first = await agent_loop._approval_decision(
"hf_jobs",
{"operation": "run", "hardware_flavor": "a10g-large"},
session,
reserved_spend_usd=0.0,
)
second = await agent_loop._approval_decision(
"hf_jobs",
{"operation": "run", "hardware_flavor": "a10g-large"},
session,
reserved_spend_usd=first.estimated_cost_usd or 0.0,
)
assert first.requires_approval is False
assert second.requires_approval is True
assert second.remaining_cap_usd == 2.0
@pytest.mark.asyncio
async def test_manual_approval_does_not_record_spend_when_session_yolo_disabled(
monkeypatch,
):
called = False
async def fake_estimate(*args, **kwargs):
nonlocal called
called = True
return CostEstimate(estimated_cost_usd=2.0, billable=True)
monkeypatch.setattr(agent_loop, "estimate_tool_cost", fake_estimate)
session = _session(enabled=False, cap=5.0, spent=0.0)
await agent_loop._record_manual_approved_spend_if_needed(
session,
"sandbox_create",
{"hardware": "a10g-large"},
)
assert called is False
assert session.auto_approval_estimated_spend_usd == 0.0
@pytest.mark.asyncio
async def test_manual_approval_records_spend_when_session_yolo_enabled(monkeypatch):
async def fake_estimate(*args, **kwargs):
return CostEstimate(estimated_cost_usd=1.25, billable=True)
monkeypatch.setattr(agent_loop, "estimate_tool_cost", fake_estimate)
session = _session(enabled=True, cap=5.0, spent=0.5)
await agent_loop._record_manual_approved_spend_if_needed(
session,
"sandbox_create",
{"hardware": "a10g-large"},
)
assert session.auto_approval_estimated_spend_usd == 1.75
|