ml-intern

Sleeping

App Files Files Community

lewtun HF Staff Codex commited on May 1

Commit

77324b8

unverified ·

1 Parent(s): 7599843

Add session YOLO auto-approval budget (#201)

Browse files

* Add session YOLO auto-approval budget

Co-authored-by: Codex <codex@openai.com>

* Address YOLO approval review feedback

Co-authored-by: Codex <codex@openai.com>

---------

Co-authored-by: Codex <codex@openai.com>

Files changed (22) hide show

agent/core/agent_loop.py +244 -26
agent/core/approval_policy.py +11 -0
agent/core/cost_estimation.py +278 -0
agent/core/session.py +37 -0
agent/core/session_persistence.py +12 -0
agent/main.py +28 -6
backend/models.py +19 -0
backend/routes/agent.py +21 -0
backend/session_manager.py +94 -0
frontend/src/components/Chat/ToolCallGroup.tsx +20 -1
frontend/src/components/Layout/AppLayout.tsx +2 -0
frontend/src/components/YoloControl.tsx +155 -0
frontend/src/hooks/useAgentChat.ts +27 -2
frontend/src/lib/sse-chat-transport.ts +13 -1
frontend/src/store/agentStore.ts +33 -0
frontend/src/store/sessionStore.ts +45 -0
frontend/src/types/agent.ts +4 -0
frontend/src/types/events.ts +4 -0
tests/unit/test_agent_model_gating.py +45 -0
tests/unit/test_auto_approval_policy.py +185 -0
tests/unit/test_cost_estimation.py +58 -0
tests/unit/test_session_manager_persistence.py +73 -0

agent/core/agent_loop.py CHANGED Viewed

@@ -19,6 +19,11 @@ from litellm import (
 from litellm.exceptions import ContextWindowExceededError
 from agent.config import Config
 from agent.messaging.gateway import NotificationGateway
 from agent.core import telemetry
 from agent.core.doom_loop import check_for_doom_loop
@@ -110,13 +115,39 @@ def _validate_tool_args(tool_args: dict) -> tuple[bool, str | None]:
     return True, None
-def _needs_approval(
     tool_name: str, tool_args: dict, config: Config | None = None
 ) -> bool:
-    """Check if a tool call requires user approval before execution."""
-    # Yolo mode: skip all approvals
-    if config and config.yolo_mode:
-        return False
     # If args are malformed, skip approval (validation error will be shown later)
     args_valid, _ = _validate_tool_args(tool_args)
@@ -127,8 +158,10 @@ def _needs_approval(
         return True
     if tool_name == "hf_jobs":
-        operation = tool_args.get("operation", "")
-        if operation not in ["run", "uv", "scheduled run", "scheduled uv"]:
             return False
         # Check if this is a CPU-only job
@@ -180,6 +213,143 @@ def _needs_approval(
     return False
 # -- LLM retry constants --------------------------------------------------
 _MAX_LLM_RETRIES = 3
 _LLM_RETRY_DELAYS = [5, 15, 30]  # seconds between retries
@@ -1063,29 +1233,49 @@ class Handlers:
                 if session.is_cancelled:
                     break
-                # Separate good tools into approval-required vs auto-execute
-                approval_required_tools: list[tuple[ToolCall, str, dict]] = []
-                non_approval_tools: list[tuple[ToolCall, str, dict]] = []
                 for tc, tool_name, tool_args in good_tools:
-                    if _needs_approval(tool_name, tool_args, session.config):
-                        approval_required_tools.append((tc, tool_name, tool_args))
                     else:
-                        non_approval_tools.append((tc, tool_name, tool_args))
                 # Execute non-approval tools (in parallel when possible)
                 if non_approval_tools:
                     # 1. Validate args upfront
                     parsed_tools: list[
-                        tuple[ToolCall, str, dict, bool, str]
                     ] = []
-                    for tc, tool_name, tool_args in non_approval_tools:
                         args_valid, error_msg = _validate_tool_args(tool_args)
                         parsed_tools.append(
-                            (tc, tool_name, tool_args, args_valid, error_msg)
                         )
                     # 2. Send all tool_call events upfront (so frontend shows them all)
-                    for tc, tool_name, tool_args, args_valid, _ in parsed_tools:
                         if args_valid:
                             await session.send_event(
                                 Event(
@@ -1103,11 +1293,14 @@ class Handlers:
                         tc: ToolCall,
                         name: str,
                         args: dict,
                         valid: bool,
                         err: str,
                     ) -> tuple[ToolCall, str, dict, str, bool]:
                         if not valid:
                             return (tc, name, args, err, False)
                         out, ok = await session.tool_router.call_tool(
                             name, args, session=session, tool_call_id=tc.id
                         )
@@ -1115,8 +1308,8 @@ class Handlers:
                     gather_task = asyncio.ensure_future(asyncio.gather(
                         *[
-                            _exec_tool(tc, name, args, valid, err)
-                            for tc, name, args, valid, err in parsed_tools
                         ]
                     ))
                     cancel_task = asyncio.ensure_future(session._cancelled.wait())
@@ -1133,7 +1326,7 @@ class Handlers:
                         except asyncio.CancelledError:
                             pass
                         # Notify frontend that in-flight tools were cancelled
-                        for tc, name, _args, valid, _ in parsed_tools:
                             if valid:
                                 await session.send_event(Event(
                                     event_type="tool_state_change",
@@ -1171,7 +1364,8 @@ class Handlers:
                 if approval_required_tools:
                     # Prepare batch approval data
                     tools_data = []
-                    for tc, tool_name, tool_args in approval_required_tools:
                         # Resolve sandbox file paths for hf_jobs scripts so the
                         # frontend can display & edit the actual file content.
                         if tool_name == "hf_jobs" and isinstance(tool_args.get("script"), str):
@@ -1181,20 +1375,42 @@ class Handlers:
                             if resolved:
                                 tool_args = {**tool_args, "script": resolved}
-                        tools_data.append({
                             "tool": tool_name,
                             "arguments": tool_args,
                             "tool_call_id": tc.id,
-                        })
                     await session.send_event(Event(
                         event_type="approval_required",
-                        data={"tools": tools_data, "count": len(tools_data)},
                     ))
                     # Store all approval-requiring tools (ToolCall objects for execution)
                     session.pending_approval = {
-                        "tool_calls": [tc for tc, _, _ in approval_required_tools],
                     }
                     # Return early - wait for EXEC_APPROVAL operation
@@ -1384,6 +1600,8 @@ class Handlers:
                 )
             )
             output, success = await session.tool_router.call_tool(
                 tool_name, tool_args, session=session, tool_call_id=tc.id
             )

 from litellm.exceptions import ContextWindowExceededError
 from agent.config import Config
+from agent.core.approval_policy import (
+    is_scheduled_operation,
+    normalize_tool_operation,
+)
+from agent.core.cost_estimation import CostEstimate, estimate_tool_cost
 from agent.messaging.gateway import NotificationGateway
 from agent.core import telemetry
 from agent.core.doom_loop import check_for_doom_loop
     return True, None
+_IMMEDIATE_HF_JOB_RUNS = {"run", "uv"}
+@dataclass(frozen=True)
+class ApprovalDecision:
+    requires_approval: bool
+    auto_approved: bool = False
+    auto_approval_blocked: bool = False
+    block_reason: str | None = None
+    estimated_cost_usd: float | None = None
+    remaining_cap_usd: float | None = None
+    billable: bool = False
+def _operation(tool_args: dict) -> str:
+    return normalize_tool_operation(tool_args.get("operation"))
+def _is_immediate_hf_job_run(tool_name: str, tool_args: dict) -> bool:
+    return tool_name == "hf_jobs" and _operation(tool_args) in _IMMEDIATE_HF_JOB_RUNS
+def _is_scheduled_hf_job_run(tool_name: str, tool_args: dict) -> bool:
+    return tool_name == "hf_jobs" and is_scheduled_operation(_operation(tool_args))
+def _is_budgeted_auto_approval_target(tool_name: str, tool_args: dict) -> bool:
+    return tool_name == "sandbox_create" or _is_immediate_hf_job_run(tool_name, tool_args)
+def _base_needs_approval(
     tool_name: str, tool_args: dict, config: Config | None = None
 ) -> bool:
+    """Check if a tool call requires approval before YOLO policy is applied."""
     # If args are malformed, skip approval (validation error will be shown later)
     args_valid, _ = _validate_tool_args(tool_args)
         return True
     if tool_name == "hf_jobs":
+        operation = _operation(tool_args)
+        if is_scheduled_operation(operation):
+            return True
+        if operation not in _IMMEDIATE_HF_JOB_RUNS:
             return False
         # Check if this is a CPU-only job
     return False
+def _needs_approval(
+    tool_name: str, tool_args: dict, config: Config | None = None
+) -> bool:
+    """Legacy sync approval predicate used by tests and CLI display helpers."""
+    if _is_scheduled_hf_job_run(tool_name, tool_args):
+        return True
+    if config and config.yolo_mode:
+        return False
+    return _base_needs_approval(tool_name, tool_args, config)
+def _session_auto_approval_enabled(session: Session | None) -> bool:
+    return bool(session and getattr(session, "auto_approval_enabled", False))
+def _effective_yolo_enabled(session: Session | None, config: Config | None) -> bool:
+    return bool((config and config.yolo_mode) or _session_auto_approval_enabled(session))
+def _remaining_budget_after_reservations(
+    session: Session | None, reserved_spend_usd: float
+) -> float | None:
+    if not session or getattr(session, "auto_approval_cost_cap_usd", None) is None:
+        return None
+    cap = float(getattr(session, "auto_approval_cost_cap_usd") or 0.0)
+    spent = float(getattr(session, "auto_approval_estimated_spend_usd", 0.0) or 0.0)
+    return round(max(0.0, cap - spent - reserved_spend_usd), 4)
+def _budget_block_reason(
+    estimate: CostEstimate,
+    *,
+    remaining_cap_usd: float | None,
+) -> str | None:
+    if estimate.estimated_cost_usd is None:
+        return estimate.block_reason or "Could not estimate the cost safely."
+    if remaining_cap_usd is not None and estimate.estimated_cost_usd > remaining_cap_usd:
+        return (
+            f"Estimated cost ${estimate.estimated_cost_usd:.2f} exceeds "
+            f"remaining YOLO cap ${remaining_cap_usd:.2f}."
+        )
+    return None
+async def _approval_decision(
+    tool_name: str,
+    tool_args: dict,
+    session: Session,
+    *,
+    reserved_spend_usd: float = 0.0,
+) -> ApprovalDecision:
+    """Return the approval decision for one parsed tool call."""
+    config = session.config
+    base_requires_approval = _base_needs_approval(tool_name, tool_args, config)
+    # Scheduled jobs are recurring/unbounded enough that YOLO never bypasses
+    # the human confirmation, including legacy config.yolo_mode.
+    if _is_scheduled_hf_job_run(tool_name, tool_args):
+        return ApprovalDecision(
+            requires_approval=True,
+            auto_approval_blocked=_effective_yolo_enabled(session, config),
+            block_reason="Scheduled HF jobs always require manual approval.",
+        )
+    yolo_enabled = _effective_yolo_enabled(session, config)
+    budgeted_target = _is_budgeted_auto_approval_target(tool_name, tool_args)
+    # Cost caps are a session-scoped web policy. Legacy config.yolo_mode
+    # remains uncapped for CLI/headless, except for scheduled jobs above.
+    session_yolo_enabled = _session_auto_approval_enabled(session)
+    if yolo_enabled and budgeted_target and session_yolo_enabled:
+        estimate = await estimate_tool_cost(tool_name, tool_args, session=session)
+        remaining = _remaining_budget_after_reservations(session, reserved_spend_usd)
+        reason = _budget_block_reason(estimate, remaining_cap_usd=remaining)
+        if reason:
+            return ApprovalDecision(
+                requires_approval=True,
+                auto_approval_blocked=True,
+                block_reason=reason,
+                estimated_cost_usd=estimate.estimated_cost_usd,
+                remaining_cap_usd=remaining,
+                billable=estimate.billable,
+            )
+        if base_requires_approval:
+            return ApprovalDecision(
+                requires_approval=False,
+                auto_approved=True,
+                estimated_cost_usd=estimate.estimated_cost_usd,
+                remaining_cap_usd=remaining,
+                billable=estimate.billable,
+            )
+        return ApprovalDecision(
+            requires_approval=False,
+            estimated_cost_usd=estimate.estimated_cost_usd,
+            remaining_cap_usd=remaining,
+            billable=estimate.billable,
+        )
+    if base_requires_approval and yolo_enabled:
+        return ApprovalDecision(requires_approval=False, auto_approved=True)
+    return ApprovalDecision(requires_approval=base_requires_approval)
+def _record_estimated_spend(session: Session, decision: ApprovalDecision) -> None:
+    if not decision.billable or decision.estimated_cost_usd is None:
+        return
+    if hasattr(session, "add_auto_approval_estimated_spend"):
+        session.add_auto_approval_estimated_spend(decision.estimated_cost_usd)
+    else:
+        session.auto_approval_estimated_spend_usd = round(
+            float(getattr(session, "auto_approval_estimated_spend_usd", 0.0) or 0.0)
+            + float(decision.estimated_cost_usd),
+            4,
+        )
+async def _record_manual_approved_spend_if_needed(
+    session: Session,
+    tool_name: str,
+    tool_args: dict,
+) -> None:
+    if not _session_auto_approval_enabled(session):
+        return
+    if not _is_budgeted_auto_approval_target(tool_name, tool_args):
+        return
+    estimate = await estimate_tool_cost(tool_name, tool_args, session=session)
+    _record_estimated_spend(
+        session,
+        ApprovalDecision(
+            requires_approval=False,
+            billable=estimate.billable,
+            estimated_cost_usd=estimate.estimated_cost_usd,
+        ),
+    )
 # -- LLM retry constants --------------------------------------------------
 _MAX_LLM_RETRIES = 3
 _LLM_RETRY_DELAYS = [5, 15, 30]  # seconds between retries
                 if session.is_cancelled:
                     break
+                # Separate good tools into approval-required vs auto-execute.
+                # Track reserved spend while classifying a batch so two
+                # auto-approved jobs in one model response cannot jointly
+                # exceed the remaining session cap.
+                approval_required_tools: list[
+                    tuple[ToolCall, str, dict, ApprovalDecision]
+                ] = []
+                non_approval_tools: list[
+                    tuple[ToolCall, str, dict, ApprovalDecision]
+                ] = []
+                reserved_auto_spend_usd = 0.0
                 for tc, tool_name, tool_args in good_tools:
+                    decision = await _approval_decision(
+                        tool_name,
+                        tool_args,
+                        session,
+                        reserved_spend_usd=reserved_auto_spend_usd,
+                    )
+                    if decision.requires_approval:
+                        approval_required_tools.append((tc, tool_name, tool_args, decision))
                     else:
+                        non_approval_tools.append((tc, tool_name, tool_args, decision))
+                        if (
+                            decision.auto_approved
+                            and decision.billable
+                            and decision.estimated_cost_usd is not None
+                        ):
+                            reserved_auto_spend_usd += decision.estimated_cost_usd
                 # Execute non-approval tools (in parallel when possible)
                 if non_approval_tools:
                     # 1. Validate args upfront
                     parsed_tools: list[
+                        tuple[ToolCall, str, dict, ApprovalDecision, bool, str]
                     ] = []
+                    for tc, tool_name, tool_args, decision in non_approval_tools:
                         args_valid, error_msg = _validate_tool_args(tool_args)
                         parsed_tools.append(
+                            (tc, tool_name, tool_args, decision, args_valid, error_msg)
                         )
                     # 2. Send all tool_call events upfront (so frontend shows them all)
+                    for tc, tool_name, tool_args, _decision, args_valid, _ in parsed_tools:
                         if args_valid:
                             await session.send_event(
                                 Event(
                         tc: ToolCall,
                         name: str,
                         args: dict,
+                        decision: ApprovalDecision,
                         valid: bool,
                         err: str,
                     ) -> tuple[ToolCall, str, dict, str, bool]:
                         if not valid:
                             return (tc, name, args, err, False)
+                        if decision.billable:
+                            _record_estimated_spend(session, decision)
                         out, ok = await session.tool_router.call_tool(
                             name, args, session=session, tool_call_id=tc.id
                         )
                     gather_task = asyncio.ensure_future(asyncio.gather(
                         *[
+                            _exec_tool(tc, name, args, decision, valid, err)
+                            for tc, name, args, decision, valid, err in parsed_tools
                         ]
                     ))
                     cancel_task = asyncio.ensure_future(session._cancelled.wait())
                         except asyncio.CancelledError:
                             pass
                         # Notify frontend that in-flight tools were cancelled
+                        for tc, name, _args, _decision, valid, _ in parsed_tools:
                             if valid:
                                 await session.send_event(Event(
                                     event_type="tool_state_change",
                 if approval_required_tools:
                     # Prepare batch approval data
                     tools_data = []
+                    blocked_payloads = []
+                    for tc, tool_name, tool_args, decision in approval_required_tools:
                         # Resolve sandbox file paths for hf_jobs scripts so the
                         # frontend can display & edit the actual file content.
                         if tool_name == "hf_jobs" and isinstance(tool_args.get("script"), str):
                             if resolved:
                                 tool_args = {**tool_args, "script": resolved}
+                        tool_payload = {
                             "tool": tool_name,
                             "arguments": tool_args,
                             "tool_call_id": tc.id,
+                        }
+                        if decision.auto_approval_blocked:
+                            tool_payload.update(
+                                {
+                                    "auto_approval_blocked": True,
+                                    "block_reason": decision.block_reason,
+                                    "estimated_cost_usd": decision.estimated_cost_usd,
+                                    "remaining_cap_usd": decision.remaining_cap_usd,
+                                }
+                            )
+                            blocked_payloads.append(tool_payload)
+                        tools_data.append(tool_payload)
+                    event_data = {"tools": tools_data, "count": len(tools_data)}
+                    if blocked_payloads:
+                        first = blocked_payloads[0]
+                        event_data.update(
+                            {
+                                "auto_approval_blocked": True,
+                                "block_reason": first.get("block_reason"),
+                                "estimated_cost_usd": first.get("estimated_cost_usd"),
+                                "remaining_cap_usd": first.get("remaining_cap_usd"),
+                            }
+                        )
                     await session.send_event(Event(
                         event_type="approval_required",
+                        data=event_data,
                     ))
                     # Store all approval-requiring tools (ToolCall objects for execution)
                     session.pending_approval = {
+                        "tool_calls": [tc for tc, _, _, _ in approval_required_tools],
                     }
                     # Return early - wait for EXEC_APPROVAL operation
                 )
             )
+            await _record_manual_approved_spend_if_needed(session, tool_name, tool_args)
             output, success = await session.tool_router.call_tool(
                 tool_name, tool_args, session=session, tool_call_id=tc.id
             )

agent/core/approval_policy.py ADDED Viewed

	@@ -0,0 +1,11 @@

+"""Shared predicates for approval-gated tool operations."""
+from typing import Any
+def normalize_tool_operation(operation: Any) -> str:
+    return str(operation or "").strip().lower()
+def is_scheduled_operation(operation: Any) -> bool:
+    return normalize_tool_operation(operation).startswith("scheduled ")

agent/core/cost_estimation.py ADDED Viewed

	@@ -0,0 +1,278 @@

+"""Conservative cost estimates for auto-approved infrastructure actions."""
+import os
+import re
+import time
+from dataclasses import dataclass
+from typing import Any
+import httpx
+OPENID_PROVIDER_URL = os.environ.get("OPENID_PROVIDER_URL", "https://huggingface.co")
+JOBS_HARDWARE_URL = f"{OPENID_PROVIDER_URL}/api/jobs/hardware"
+JOBS_PRICE_CACHE_TTL_S = 6 * 60 * 60
+DEFAULT_JOB_TIMEOUT_HOURS = 0.5
+DEFAULT_SANDBOX_RESERVATION_HOURS = 1.0
+# Static fallback prices are intentionally conservative enough for a budget
+# guard. The live /api/jobs/hardware catalog wins whenever it is reachable.
+HF_JOBS_PRICE_USD_PER_HOUR: dict[str, float] = {
+    "cpu-basic": 0.05,
+    "cpu-upgrade": 0.25,
+    "cpu-performance": 0.50,
+    "cpu-xl": 1.00,
+    "t4-small": 0.60,
+    "t4-medium": 0.90,
+    "l4x1": 1.00,
+    "l4x4": 4.00,
+    "l40sx1": 2.00,
+    "l40sx4": 8.00,
+    "l40sx8": 16.00,
+    "a10g-small": 1.00,
+    "a10g-large": 2.00,
+    "a10g-largex2": 4.00,
+    "a10g-largex4": 8.00,
+    "a100-large": 4.00,
+    "a100x4": 16.00,
+    "a100x8": 32.00,
+    "h200": 10.00,
+    "h200x2": 20.00,
+    "h200x4": 40.00,
+    "h200x8": 80.00,
+    "inf2x6": 6.00,
+}
+SPACE_PRICE_USD_PER_HOUR: dict[str, float] = {
+    "cpu-basic": 0.0,
+    "cpu-upgrade": 0.05,
+    "cpu-performance": 0.50,
+    "cpu-xl": 1.00,
+    "t4-small": 0.60,
+    "t4-medium": 0.90,
+    "l4x1": 1.00,
+    "l4x4": 4.00,
+    "l40sx1": 2.00,
+    "l40sx4": 8.00,
+    "l40sx8": 16.00,
+    "a10g-small": 1.00,
+    "a10g-large": 2.00,
+    "a10g-largex2": 4.00,
+    "a10g-largex4": 8.00,
+    "a100-large": 4.00,
+    "a100x4": 16.00,
+    "a100x8": 32.00,
+    "h200": 10.00,
+    "h200x2": 20.00,
+    "h200x4": 40.00,
+    "h200x8": 80.00,
+    "inf2x6": 6.00,
+}
+_DURATION_RE = re.compile(r"^\s*(\d+(?:\.\d+)?)\s*([smhd]?)\s*$", re.IGNORECASE)
+_PRICE_RE = re.compile(r"(\d+(?:\.\d+)?)")
+_jobs_price_cache: tuple[float, dict[str, float]] | None = None
+@dataclass(frozen=True)
+class CostEstimate:
+    """Estimated cost for a tool call.
+    ``estimated_cost_usd=None`` means the call may be billable but we could not
+    estimate it safely, so auto-approval should fall back to a human decision.
+    """
+    estimated_cost_usd: float | None
+    billable: bool
+    block_reason: str | None = None
+    label: str | None = None
+def parse_timeout_hours(value: Any, *, default_hours: float = DEFAULT_JOB_TIMEOUT_HOURS) -> float | None:
+    """Parse HF timeout values into hours.
+    Strings accept ``s``, ``m``, ``h``, or ``d`` suffixes. Numeric values are
+    treated as seconds, matching the Hub client's typed timeout parameter.
+    """
+    if value is None or value == "":
+        return default_hours
+    if isinstance(value, bool):
+        return None
+    if isinstance(value, int | float):
+        seconds = float(value)
+        return seconds / 3600 if seconds > 0 else None
+    if not isinstance(value, str):
+        return None
+    match = _DURATION_RE.match(value)
+    if not match:
+        return None
+    amount = float(match.group(1))
+    unit = match.group(2).lower() or "s"
+    if amount <= 0:
+        return None
+    if unit == "s":
+        return amount / 3600
+    if unit == "m":
+        return amount / 60
+    if unit == "h":
+        return amount
+    if unit == "d":
+        return amount * 24
+    return None
+def _extract_flavor(item: dict[str, Any]) -> str | None:
+    for key in ("flavor", "name", "id", "value", "hardware", "hardware_flavor"):
+        value = item.get(key)
+        if isinstance(value, str) and value:
+            return value
+    return None
+def _coerce_price(value: Any) -> float | None:
+    if isinstance(value, bool) or value is None:
+        return None
+    if isinstance(value, int | float):
+        return float(value) if value >= 0 else None
+    if isinstance(value, str):
+        match = _PRICE_RE.search(value.replace(",", ""))
+        if match:
+            return float(match.group(1))
+    return None
+def _extract_hourly_price(item: dict[str, Any]) -> float | None:
+    for key in (
+        "price",
+        "price_usd",
+        "priceUsd",
+        "price_per_hour",
+        "pricePerHour",
+        "hourly_price",
+        "hourlyPrice",
+        "usd_per_hour",
+        "usdPerHour",
+    ):
+        price = _coerce_price(item.get(key))
+        if price is not None:
+            return price
+    for key in ("pricing", "billing", "cost"):
+        nested = item.get(key)
+        if isinstance(nested, dict):
+            price = _extract_hourly_price(nested)
+            if price is not None:
+                return price
+    return None
+def _iter_hardware_items(payload: Any):
+    if isinstance(payload, list):
+        for item in payload:
+            yield from _iter_hardware_items(item)
+    elif isinstance(payload, dict):
+        if _extract_flavor(payload):
+            yield payload
+        for key in ("hardware", "flavors", "items", "data", "jobs"):
+            child = payload.get(key)
+            if child is not None:
+                yield from _iter_hardware_items(child)
+def _parse_jobs_price_catalog(payload: Any) -> dict[str, float]:
+    prices: dict[str, float] = {}
+    for item in _iter_hardware_items(payload):
+        flavor = _extract_flavor(item)
+        price = _extract_hourly_price(item)
+        if flavor and price is not None:
+            prices[flavor] = price
+    return prices
+async def hf_jobs_price_catalog() -> dict[str, float]:
+    """Return live HF Jobs hourly prices, falling back to static prices."""
+    global _jobs_price_cache
+    now = time.monotonic()
+    if _jobs_price_cache and now - _jobs_price_cache[0] < JOBS_PRICE_CACHE_TTL_S:
+        return dict(_jobs_price_cache[1])
+    prices: dict[str, float] = {}
+    try:
+        async with httpx.AsyncClient(timeout=3.0) as client:
+            response = await client.get(JOBS_HARDWARE_URL)
+            if response.status_code == 200:
+                prices = _parse_jobs_price_catalog(response.json())
+    except (httpx.HTTPError, ValueError):
+        prices = {}
+    if not prices:
+        prices = dict(HF_JOBS_PRICE_USD_PER_HOUR)
+    else:
+        prices = {**HF_JOBS_PRICE_USD_PER_HOUR, **prices}
+    _jobs_price_cache = (now, prices)
+    return dict(prices)
+async def estimate_hf_job_cost(args: dict[str, Any]) -> CostEstimate:
+    flavor = str(
+        args.get("hardware_flavor")
+        or args.get("flavor")
+        or args.get("hardware")
+        or "cpu-basic"
+    )
+    timeout_hours = parse_timeout_hours(args.get("timeout"))
+    if timeout_hours is None:
+        return CostEstimate(
+            estimated_cost_usd=None,
+            billable=True,
+            block_reason=f"Could not parse HF job timeout: {args.get('timeout')!r}.",
+            label=flavor,
+        )
+    prices = await hf_jobs_price_catalog()
+    price = prices.get(flavor)
+    if price is None:
+        return CostEstimate(
+            estimated_cost_usd=None,
+            billable=True,
+            block_reason=f"No price is available for HF job hardware '{flavor}'.",
+            label=flavor,
+        )
+    return CostEstimate(
+        estimated_cost_usd=round(price * timeout_hours, 4),
+        billable=price > 0,
+        label=flavor,
+    )
+async def estimate_sandbox_cost(args: dict[str, Any], *, session: Any = None) -> CostEstimate:
+    if session is not None and getattr(session, "sandbox", None):
+        return CostEstimate(estimated_cost_usd=0.0, billable=False, label="existing")
+    hardware = str(args.get("hardware") or "cpu-basic")
+    price = SPACE_PRICE_USD_PER_HOUR.get(hardware)
+    if price is None:
+        return CostEstimate(
+            estimated_cost_usd=None,
+            billable=True,
+            block_reason=f"No price is available for sandbox hardware '{hardware}'.",
+            label=hardware,
+        )
+    return CostEstimate(
+        estimated_cost_usd=round(price * DEFAULT_SANDBOX_RESERVATION_HOURS, 4),
+        billable=price > 0,
+        label=hardware,
+    )
+async def estimate_tool_cost(
+    tool_name: str, args: dict[str, Any], *, session: Any = None
+) -> CostEstimate:
+    if tool_name == "sandbox_create":
+        return await estimate_sandbox_cost(args, session=session)
+    if tool_name == "hf_jobs":
+        return await estimate_hf_job_cost(args)
+    return CostEstimate(estimated_cost_usd=0.0, billable=False)

agent/core/session.py CHANGED Viewed

@@ -120,6 +120,9 @@ class Session:
         self.notification_gateway = notification_gateway
         self.notification_destinations = list(notification_destinations or [])
         self.defer_turn_complete_notification = defer_turn_complete_notification
         # Session trajectory logging
         self.logged_events: list[dict] = []
@@ -313,6 +316,40 @@ class Session:
         self.config.model_name = model_name
         self.context_manager.model_max_tokens = _get_max_tokens_safe(model_name)
     def effective_effort_for(self, model_name: str) -> str | None:
         """Resolve the effort level to actually send for ``model_name``.

         self.notification_gateway = notification_gateway
         self.notification_destinations = list(notification_destinations or [])
         self.defer_turn_complete_notification = defer_turn_complete_notification
+        self.auto_approval_enabled: bool = False
+        self.auto_approval_cost_cap_usd: float | None = None
+        self.auto_approval_estimated_spend_usd: float = 0.0
         # Session trajectory logging
         self.logged_events: list[dict] = []
         self.config.model_name = model_name
         self.context_manager.model_max_tokens = _get_max_tokens_safe(model_name)
+    def set_auto_approval_policy(
+        self, *, enabled: bool, cost_cap_usd: float | None
+    ) -> None:
+        self.auto_approval_enabled = bool(enabled)
+        self.auto_approval_cost_cap_usd = cost_cap_usd
+    def add_auto_approval_estimated_spend(self, amount_usd: float | None) -> None:
+        if amount_usd is None or amount_usd <= 0:
+            return
+        self.auto_approval_estimated_spend_usd = round(
+            self.auto_approval_estimated_spend_usd + float(amount_usd), 4
+        )
+    @property
+    def auto_approval_remaining_usd(self) -> float | None:
+        if self.auto_approval_cost_cap_usd is None:
+            return None
+        return round(
+            max(
+                0.0,
+                self.auto_approval_cost_cap_usd
+                - self.auto_approval_estimated_spend_usd,
+            ),
+            4,
+        )
+    def auto_approval_policy_summary(self) -> dict[str, Any]:
+        return {
+            "enabled": self.auto_approval_enabled,
+            "cost_cap_usd": self.auto_approval_cost_cap_usd,
+            "estimated_spend_usd": round(self.auto_approval_estimated_spend_usd, 4),
+            "remaining_usd": self.auto_approval_remaining_usd,
+        }
     def effective_effort_for(self, model_name: str) -> str | None:
         """Resolve the effort level to actually send for ``model_name``.

agent/core/session_persistence.py CHANGED Viewed

@@ -176,6 +176,9 @@ class MongoSessionStore(NoopSessionStore):
         pending_approval: list[dict[str, Any]] | None = None,
         claude_counted: bool = False,
         notification_destinations: list[str] | None = None,
     ) -> None:
         if not self._ready():
             return
@@ -204,6 +207,9 @@ class MongoSessionStore(NoopSessionStore):
                     "pending_approval": pending_approval or [],
                     "claude_counted": claude_counted,
                     "notification_destinations": notification_destinations or [],
                 },
             },
             upsert=True,
@@ -224,6 +230,9 @@ class MongoSessionStore(NoopSessionStore):
         claude_counted: bool = False,
         created_at: datetime | None = None,
         notification_destinations: list[str] | None = None,
     ) -> None:
         if not self._ready():
             return
@@ -241,6 +250,9 @@ class MongoSessionStore(NoopSessionStore):
             pending_approval=pending_approval,
             claude_counted=claude_counted,
             notification_destinations=notification_destinations,
         )
         ops: list[Any] = []
         for idx, raw in enumerate(messages):

         pending_approval: list[dict[str, Any]] | None = None,
         claude_counted: bool = False,
         notification_destinations: list[str] | None = None,
+        auto_approval_enabled: bool = False,
+        auto_approval_cost_cap_usd: float | None = None,
+        auto_approval_estimated_spend_usd: float = 0.0,
     ) -> None:
         if not self._ready():
             return
                     "pending_approval": pending_approval or [],
                     "claude_counted": claude_counted,
                     "notification_destinations": notification_destinations or [],
+                    "auto_approval_enabled": auto_approval_enabled,
+                    "auto_approval_cost_cap_usd": auto_approval_cost_cap_usd,
+                    "auto_approval_estimated_spend_usd": auto_approval_estimated_spend_usd,
                 },
             },
             upsert=True,
         claude_counted: bool = False,
         created_at: datetime | None = None,
         notification_destinations: list[str] | None = None,
+        auto_approval_enabled: bool = False,
+        auto_approval_cost_cap_usd: float | None = None,
+        auto_approval_estimated_spend_usd: float = 0.0,
     ) -> None:
         if not self._ready():
             return
             pending_approval=pending_approval,
             claude_counted=claude_counted,
             notification_destinations=notification_destinations,
+            auto_approval_enabled=auto_approval_enabled,
+            auto_approval_cost_cap_usd=auto_approval_cost_cap_usd,
+            auto_approval_estimated_spend_usd=auto_approval_estimated_spend_usd,
         )
         ops: list[Any] = []
         for idx, raw in enumerate(messages):

agent/main.py CHANGED Viewed

@@ -21,6 +21,7 @@ import litellm
 from prompt_toolkit import PromptSession
 from agent.config import load_config
 from agent.core.agent_loop import submission_loop
 from agent.core import model_switcher
 from agent.core.hf_tokens import resolve_hf_token
@@ -55,6 +56,20 @@ litellm.suppress_debug_info = True
 CLI_CONFIG_PATH = Path(__file__).parent.parent / "configs" / "cli_agent_config.json"
 def _configure_runtime_logging() -> None:
     """Keep third-party warning spam from punching through the interactive UI."""
     import logging
@@ -375,8 +390,11 @@ async def event_listener(
                 tools_data = event.data.get("tools", []) if event.data else []
                 count = event.data.get("count", 0) if event.data else 0
-                # If yolo mode is active, auto-approve everything
-                if config and config.yolo_mode:
                     approvals = [
                         {
                             "tool_call_id": t.get("tool_call_id", ""),
@@ -1293,14 +1311,18 @@ async def headless_main(
             else:
                 print_tool_log(tool, log)
         elif event.event_type == "approval_required":
-            # Auto-approve everything in headless mode (safety net if yolo_mode
-            # didn't prevent the approval event for some reason)
             tools_data = event.data.get("tools", []) if event.data else []
             approvals = [
                 {
                     "tool_call_id": t.get("tool_call_id", ""),
-                    "approved": True,
-                    "feedback": None,
                 }
                 for t in tools_data
             ]

 from prompt_toolkit import PromptSession
 from agent.config import load_config
+from agent.core.approval_policy import is_scheduled_operation
 from agent.core.agent_loop import submission_loop
 from agent.core import model_switcher
 from agent.core.hf_tokens import resolve_hf_token
 CLI_CONFIG_PATH = Path(__file__).parent.parent / "configs" / "cli_agent_config.json"
+def _is_scheduled_hf_job_tool(tool_info: dict[str, Any]) -> bool:
+    if tool_info.get("tool") != "hf_jobs":
+        return False
+    arguments = tool_info.get("arguments") or {}
+    if isinstance(arguments, str):
+        try:
+            arguments = json.loads(arguments)
+        except json.JSONDecodeError:
+            return False
+    if not isinstance(arguments, dict):
+        return False
+    return is_scheduled_operation(arguments.get("operation"))
 def _configure_runtime_logging() -> None:
     """Keep third-party warning spam from punching through the interactive UI."""
     import logging
                 tools_data = event.data.get("tools", []) if event.data else []
                 count = event.data.get("count", 0) if event.data else 0
+                # If yolo mode is active, auto-approve everything except
+                # scheduled HF jobs, whose recurring cost stays manual.
+                if config and config.yolo_mode and not any(
+                    _is_scheduled_hf_job_tool(t) for t in tools_data
+                ):
                     approvals = [
                         {
                             "tool_call_id": t.get("tool_call_id", ""),
             else:
                 print_tool_log(tool, log)
         elif event.event_type == "approval_required":
+            # Auto-approve in headless mode, except scheduled HF jobs. Those
+            # are rejected because their recurring cost needs manual approval.
             tools_data = event.data.get("tools", []) if event.data else []
             approvals = [
                 {
                     "tool_call_id": t.get("tool_call_id", ""),
+                    "approved": not _is_scheduled_hf_job_tool(t),
+                    "feedback": (
+                        "Scheduled HF jobs require manual approval."
+                        if _is_scheduled_hf_job_tool(t)
+                        else None
+                    ),
                 }
                 for t in tools_data
             ]

backend/models.py CHANGED Viewed

@@ -76,6 +76,15 @@ class PendingApprovalTool(BaseModel):
     arguments: dict[str, Any] = {}
 class SessionInfo(BaseModel):
     """Session metadata."""
@@ -89,6 +98,9 @@ class SessionInfo(BaseModel):
     model: str | None = None
     title: str | None = None
     notification_destinations: list[str] = Field(default_factory=list)
 class SessionNotificationsRequest(BaseModel):
@@ -97,6 +109,13 @@ class SessionNotificationsRequest(BaseModel):
     destinations: list[str]
 class HealthResponse(BaseModel):
     """Health check response."""

     arguments: dict[str, Any] = {}
+class SessionAutoApprovalInfo(BaseModel):
+    """Per-session auto-approval budget state."""
+    enabled: bool = False
+    cost_cap_usd: float | None = None
+    estimated_spend_usd: float = 0.0
+    remaining_usd: float | None = None
 class SessionInfo(BaseModel):
     """Session metadata."""
     model: str | None = None
     title: str | None = None
     notification_destinations: list[str] = Field(default_factory=list)
+    auto_approval: SessionAutoApprovalInfo = Field(
+        default_factory=SessionAutoApprovalInfo
+    )
 class SessionNotificationsRequest(BaseModel):
     destinations: list[str]
+class SessionYoloRequest(BaseModel):
+    """Update a session's auto-approval policy."""
+    enabled: bool
+    cost_cap_usd: float | None = Field(default=None, ge=0)
 class HealthResponse(BaseModel):
     """Health check response."""

backend/routes/agent.py CHANGED Viewed

@@ -26,6 +26,7 @@ from models import (
     SessionInfo,
     SessionNotificationsRequest,
     SessionResponse,
     SubmitRequest,
     TruncateRequest,
 )
@@ -498,6 +499,26 @@ async def set_session_notifications(
     }
 @router.get("/user/quota")
 async def get_user_quota(user: dict = Depends(get_current_user)) -> dict:
     """Return the user's plan tier and today's premium-model quota state."""

     SessionInfo,
     SessionNotificationsRequest,
     SessionResponse,
+    SessionYoloRequest,
     SubmitRequest,
     TruncateRequest,
 )
     }
+@router.patch("/session/{session_id}/yolo")
+async def set_session_yolo(
+    session_id: str,
+    body: SessionYoloRequest,
+    user: dict = Depends(get_current_user),
+) -> dict:
+    """Update the session-scoped auto-approval policy."""
+    await _check_session_access(session_id, user)
+    try:
+        summary = await session_manager.update_session_auto_approval(
+            session_id,
+            enabled=body.enabled,
+            cost_cap_usd=body.cost_cap_usd,
+            cap_provided="cost_cap_usd" in body.model_fields_set,
+        )
+    except ValueError as e:
+        raise HTTPException(status_code=400, detail=str(e))
+    return {"session_id": session_id, **summary}
 @router.get("/user/quota")
 async def get_user_quota(user: dict = Depends(get_current_user)) -> dict:
     """Return the user's plan tier and today's premium-model quota state."""

backend/session_manager.py CHANGED Viewed

@@ -116,6 +116,7 @@ class SessionCapacityError(Exception):
 # and per-request overhead.
 MAX_SESSIONS: int = 200
 MAX_SESSIONS_PER_USER: int = 10
 class SessionManager:
@@ -297,6 +298,20 @@ class SessionManager:
             return "ended"
         return "idle"
     async def _start_agent_session(
         self,
         *,
@@ -370,6 +385,20 @@ class SessionManager:
                 notification_destinations=list(
                     agent_session.session.notification_destinations
                 ),
             )
         except Exception as e:
             logger.warning(
@@ -451,6 +480,14 @@ class SessionManager:
         self._restore_pending_approval(session, meta.get("pending_approval") or [])
         session.turn_count = int(meta.get("turn_count") or 0)
         created_at = meta.get("created_at")
         if not isinstance(created_at, datetime):
@@ -883,6 +920,43 @@ class SessionManager:
         await self.persist_session_snapshot(agent_session, runtime_state="idle")
         return True
     def get_session_owner(self, session_id: str) -> str | None:
         """Get the user_id that owns a session, or None if session doesn't exist."""
         agent_session = self.sessions.get(session_id)
@@ -925,6 +999,7 @@ class SessionManager:
             "notification_destinations": list(
                 agent_session.session.notification_destinations
             ),
         }
     def set_notification_destinations(
@@ -991,6 +1066,25 @@ class SessionManager:
                         "model": row.get("model"),
                         "title": row.get("title"),
                         "notification_destinations": row.get("notification_destinations") or [],
                     }
                 )
             return results

 # and per-request overhead.
 MAX_SESSIONS: int = 200
 MAX_SESSIONS_PER_USER: int = 10
+DEFAULT_YOLO_COST_CAP_USD: float = 5.0
 class SessionManager:
             return "ended"
         return "idle"
+    @staticmethod
+    def _auto_approval_summary(session: Session) -> dict[str, Any]:
+        if hasattr(session, "auto_approval_policy_summary"):
+            return session.auto_approval_policy_summary()
+        cap = getattr(session, "auto_approval_cost_cap_usd", None)
+        estimated = float(getattr(session, "auto_approval_estimated_spend_usd", 0.0) or 0.0)
+        remaining = None if cap is None else round(max(0.0, float(cap) - estimated), 4)
+        return {
+            "enabled": bool(getattr(session, "auto_approval_enabled", False)),
+            "cost_cap_usd": cap,
+            "estimated_spend_usd": round(estimated, 4),
+            "remaining_usd": remaining,
+        }
     async def _start_agent_session(
         self,
         *,
                 notification_destinations=list(
                     agent_session.session.notification_destinations
                 ),
+                auto_approval_enabled=bool(
+                    getattr(agent_session.session, "auto_approval_enabled", False)
+                ),
+                auto_approval_cost_cap_usd=getattr(
+                    agent_session.session, "auto_approval_cost_cap_usd", None
+                ),
+                auto_approval_estimated_spend_usd=float(
+                    getattr(
+                        agent_session.session,
+                        "auto_approval_estimated_spend_usd",
+                        0.0,
+                    )
+                    or 0.0
+                ),
             )
         except Exception as e:
             logger.warning(
         self._restore_pending_approval(session, meta.get("pending_approval") or [])
         session.turn_count = int(meta.get("turn_count") or 0)
+        session.auto_approval_enabled = bool(meta.get("auto_approval_enabled", False))
+        raw_cap = meta.get("auto_approval_cost_cap_usd")
+        session.auto_approval_cost_cap_usd = (
+            float(raw_cap) if isinstance(raw_cap, int | float) else None
+        )
+        session.auto_approval_estimated_spend_usd = float(
+            meta.get("auto_approval_estimated_spend_usd") or 0.0
+        )
         created_at = meta.get("created_at")
         if not isinstance(created_at, datetime):
         await self.persist_session_snapshot(agent_session, runtime_state="idle")
         return True
+    async def update_session_auto_approval(
+        self,
+        session_id: str,
+        *,
+        enabled: bool,
+        cost_cap_usd: float | None,
+        cap_provided: bool = False,
+    ) -> dict[str, Any]:
+        agent_session = self.sessions.get(session_id)
+        if not agent_session or not agent_session.is_active:
+            raise ValueError("Session not found or inactive")
+        session = agent_session.session
+        if enabled:
+            if not cap_provided and cost_cap_usd is None:
+                cost_cap_usd = getattr(
+                    session, "auto_approval_cost_cap_usd", None
+                )
+                if cost_cap_usd is None:
+                    cost_cap_usd = DEFAULT_YOLO_COST_CAP_USD
+            elif cost_cap_usd is None:
+                cost_cap_usd = DEFAULT_YOLO_COST_CAP_USD
+        else:
+            if not cap_provided:
+                cost_cap_usd = getattr(session, "auto_approval_cost_cap_usd", None)
+        if hasattr(session, "set_auto_approval_policy"):
+            session.set_auto_approval_policy(
+                enabled=enabled,
+                cost_cap_usd=cost_cap_usd,
+            )
+        else:
+            session.auto_approval_enabled = bool(enabled)
+            session.auto_approval_cost_cap_usd = cost_cap_usd
+        await self.persist_session_snapshot(agent_session)
+        return self._auto_approval_summary(session)
     def get_session_owner(self, session_id: str) -> str | None:
         """Get the user_id that owns a session, or None if session doesn't exist."""
         agent_session = self.sessions.get(session_id)
             "notification_destinations": list(
                 agent_session.session.notification_destinations
             ),
+            "auto_approval": self._auto_approval_summary(agent_session.session),
         }
     def set_notification_destinations(
                         "model": row.get("model"),
                         "title": row.get("title"),
                         "notification_destinations": row.get("notification_destinations") or [],
+                        "auto_approval": {
+                            "enabled": bool(row.get("auto_approval_enabled", False)),
+                            "cost_cap_usd": row.get("auto_approval_cost_cap_usd"),
+                            "estimated_spend_usd": float(
+                                row.get("auto_approval_estimated_spend_usd") or 0.0
+                            ),
+                            "remaining_usd": (
+                                None
+                                if row.get("auto_approval_cost_cap_usd") is None
+                                else round(
+                                    max(
+                                        0.0,
+                                        float(row.get("auto_approval_cost_cap_usd") or 0.0)
+                                        - float(row.get("auto_approval_estimated_spend_usd") or 0.0),
+                                    ),
+                                    4,
+                                )
+                            ),
+                        },
                     }
                 )
             return results

frontend/src/components/Chat/ToolCallGroup.tsx CHANGED Viewed

@@ -1,5 +1,5 @@
 import { useCallback, useEffect, useMemo, useRef, useState } from 'react';
-import { Box, Stack, Typography, Chip, Button, TextField, IconButton, Link, CircularProgress } from '@mui/material';
 import CheckCircleOutlineIcon from '@mui/icons-material/CheckCircleOutline';
 import ErrorOutlineIcon from '@mui/icons-material/ErrorOutline';
 import OpenInNewIcon from '@mui/icons-material/OpenInNew';
@@ -502,6 +502,7 @@ function InlineApproval({
 }) {
   const [feedback, setFeedback] = useState('');
   const args = input as Record<string, unknown> | undefined;
   const { setPanel, getEditedScript } = useAgentStore();
   const { setRightPanelOpen, setLeftSidebarOpen } = useLayoutStore();
   const hasEditedScript = !!getEditedScript(toolCallId);
@@ -521,6 +522,24 @@ function InlineApproval({
   return (
     <Box sx={{ px: 1.5, py: 1.5, borderTop: '1px solid var(--tool-border)' }}>
       {toolName === 'sandbox_create' && args && (() => {
         const hw = String(args.hardware || 'cpu-basic');
         const cost = costLabel(hw);

 import { useCallback, useEffect, useMemo, useRef, useState } from 'react';
+import { Alert, Box, Stack, Typography, Chip, Button, TextField, IconButton, Link, CircularProgress } from '@mui/material';
 import CheckCircleOutlineIcon from '@mui/icons-material/CheckCircleOutline';
 import ErrorOutlineIcon from '@mui/icons-material/ErrorOutline';
 import OpenInNewIcon from '@mui/icons-material/OpenInNew';
 }) {
   const [feedback, setFeedback] = useState('');
   const args = input as Record<string, unknown> | undefined;
+  const autoApproval = useAgentStore((state) => state.budgetBlocks[toolCallId]);
   const { setPanel, getEditedScript } = useAgentStore();
   const { setRightPanelOpen, setLeftSidebarOpen } = useLayoutStore();
   const hasEditedScript = !!getEditedScript(toolCallId);
   return (
     <Box sx={{ px: 1.5, py: 1.5, borderTop: '1px solid var(--tool-border)' }}>
+      {autoApproval && (
+        <Alert
+          severity="warning"
+          sx={{
+            mb: 1.5,
+            py: 0.5,
+            bgcolor: 'rgba(245,158,11,0.08)',
+            border: '1px solid rgba(245,158,11,0.18)',
+            color: 'var(--text)',
+            '& .MuiAlert-icon': { color: 'var(--accent-yellow)' },
+          }}
+        >
+          <Typography variant="body2" sx={{ fontSize: '0.72rem' }}>
+            YOLO paused: {autoApproval.reason || 'manual approval required.'}
+          </Typography>
+        </Alert>
+      )}
       {toolName === 'sandbox_create' && args && (() => {
         const hw = String(args.hardware || 'cpu-basic');
         const cost = costLabel(hw);

frontend/src/components/Layout/AppLayout.tsx CHANGED Viewed

@@ -24,6 +24,7 @@ import SessionSidebar from '@/components/SessionSidebar/SessionSidebar';
 import SessionChat from '@/components/SessionChat';
 import CodePanel from '@/components/CodePanel/CodePanel';
 import WelcomeScreen from '@/components/WelcomeScreen/WelcomeScreen';
 import { apiFetch } from '@/utils/api';
 const DRAWER_WIDTH = 260;
@@ -252,6 +253,7 @@ export default function AppLayout() {
           </Box>
           <Box sx={{ display: 'flex', alignItems: 'center', gap: 0.5 }}>
             <IconButton
               onClick={toggleTheme}
               size="small"

 import SessionChat from '@/components/SessionChat';
 import CodePanel from '@/components/CodePanel/CodePanel';
 import WelcomeScreen from '@/components/WelcomeScreen/WelcomeScreen';
+import YoloControl from '@/components/YoloControl';
 import { apiFetch } from '@/utils/api';
 const DRAWER_WIDTH = 260;
           </Box>
           <Box sx={{ display: 'flex', alignItems: 'center', gap: 0.5 }}>
+            <YoloControl />
             <IconButton
               onClick={toggleTheme}
               size="small"

frontend/src/components/YoloControl.tsx ADDED Viewed

	@@ -0,0 +1,155 @@

+import { useEffect, useMemo, useState } from 'react';
+import {
+  Button,
+  Dialog,
+  DialogActions,
+  DialogContent,
+  DialogTitle,
+  TextField,
+  Tooltip,
+  Typography,
+} from '@mui/material';
+import BoltOutlinedIcon from '@mui/icons-material/BoltOutlined';
+import { useSessionStore } from '@/store/sessionStore';
+import { apiFetch } from '@/utils/api';
+const DEFAULT_CAP_USD = 5;
+function money(value: number | null | undefined): string {
+  if (value === null || value === undefined) return 'uncapped';
+  if (value >= 100) return `$${value.toFixed(0)}`;
+  return `$${value.toFixed(2).replace(/\.00$/, '')}`;
+}
+export default function YoloControl() {
+  const { sessions, activeSessionId, updateSessionYolo } = useSessionStore();
+  const activeSession = useMemo(
+    () => sessions.find((s) => s.id === activeSessionId) || null,
+    [sessions, activeSessionId],
+  );
+  const [dialogOpen, setDialogOpen] = useState(false);
+  const [capInput, setCapInput] = useState(String(DEFAULT_CAP_USD));
+  const [busy, setBusy] = useState(false);
+  const [error, setError] = useState<string | null>(null);
+  const enabled = Boolean(activeSession?.autoApprovalEnabled);
+  const disabled = !activeSessionId || activeSession?.expired || busy;
+  const remaining = activeSession?.autoApprovalRemainingUsd ?? null;
+  const cap = activeSession?.autoApprovalCostCapUsd ?? null;
+  useEffect(() => {
+    if (!activeSession) return;
+    setCapInput(String(activeSession.autoApprovalCostCapUsd ?? DEFAULT_CAP_USD));
+  }, [activeSession?.id, activeSession?.autoApprovalCostCapUsd]); // eslint-disable-line react-hooks/exhaustive-deps
+  async function patchPolicy(nextEnabled: boolean, nextCap?: number) {
+    if (!activeSessionId) return null;
+    setBusy(true);
+    setError(null);
+    try {
+      const body: Record<string, unknown> = { enabled: nextEnabled };
+      if (nextCap !== undefined) body.cost_cap_usd = nextCap;
+      const response = await apiFetch(`/api/session/${activeSessionId}/yolo`, {
+        method: 'PATCH',
+        body: JSON.stringify(body),
+      });
+      if (!response.ok) {
+        throw new Error(await response.text());
+      }
+      const data = await response.json();
+      updateSessionYolo(activeSessionId, data);
+      return data;
+    } catch {
+      setError('Could not update YOLO settings.');
+      return null;
+    } finally {
+      setBusy(false);
+    }
+  }
+  const handleToggle = async () => {
+    if (disabled) return;
+    if (enabled) {
+      await patchPolicy(false);
+      return;
+    }
+    const nextCap = cap ?? DEFAULT_CAP_USD;
+    const updated = await patchPolicy(true, nextCap);
+    if (updated) {
+      setCapInput(String(updated.cost_cap_usd ?? nextCap));
+      setDialogOpen(true);
+    }
+  };
+  const handleSaveCap = async () => {
+    const parsed = Number(capInput);
+    if (!Number.isFinite(parsed) || parsed < 0) {
+      setError('Enter a non-negative dollar amount.');
+      return;
+    }
+    const updated = await patchPolicy(true, parsed);
+    if (updated) setDialogOpen(false);
+  };
+  return (
+    <>
+      <Tooltip title={enabled ? 'Disable session YOLO auto-approval' : 'Enable session YOLO auto-approval'}>
+        <span>
+          <Button
+            size="small"
+            variant={enabled ? 'contained' : 'outlined'}
+            disabled={disabled}
+            onClick={handleToggle}
+            startIcon={<BoltOutlinedIcon sx={{ fontSize: 16 }} />}
+            sx={{
+              minWidth: { xs: 74, md: 116 },
+              height: 32,
+              px: { xs: 1, md: 1.25 },
+              borderRadius: '8px',
+              textTransform: 'none',
+              fontSize: '0.72rem',
+              whiteSpace: 'nowrap',
+              bgcolor: enabled ? 'var(--accent-yellow)' : 'transparent',
+              color: enabled ? '#111' : 'text.secondary',
+              borderColor: enabled ? 'var(--accent-yellow)' : 'divider',
+              '&:hover': {
+                bgcolor: enabled ? 'var(--accent-yellow)' : 'action.hover',
+                borderColor: 'var(--accent-yellow)',
+              },
+            }}
+          >
+            {enabled ? `YOLO ${money(remaining)}` : 'YOLO'}
+          </Button>
+        </span>
+      </Tooltip>
+      <Dialog open={dialogOpen} onClose={() => setDialogOpen(false)} maxWidth="xs" fullWidth>
+        <DialogTitle sx={{ pb: 1 }}>YOLO Budget</DialogTitle>
+        <DialogContent sx={{ display: 'flex', flexDirection: 'column', gap: 1.5, pt: 1 }}>
+          <Typography variant="body2" color="text.secondary">
+            Auto-approval is active for this session. Scheduled HF jobs still require approval.
+          </Typography>
+          <TextField
+            autoFocus
+            label="Session cap (USD)"
+            type="number"
+            size="small"
+            value={capInput}
+            onChange={(e) => setCapInput(e.target.value)}
+            inputProps={{ min: 0, step: 0.5 }}
+            error={Boolean(error)}
+            helperText={error || `Estimated spend: ${money(activeSession?.autoApprovalEstimatedSpendUsd ?? 0)} of ${money(cap)}`}
+          />
+        </DialogContent>
+        <DialogActions>
+          <Button onClick={() => setDialogOpen(false)} sx={{ textTransform: 'none' }}>
+            Close
+          </Button>
+          <Button onClick={handleSaveCap} disabled={busy} variant="contained" sx={{ textTransform: 'none' }}>
+            Save
+          </Button>
+        </DialogActions>
+      </Dialog>
+    </>
+  );
+}

frontend/src/hooks/useAgentChat.ts CHANGED Viewed

@@ -36,7 +36,7 @@ export function useAgentChat({ sessionId, isActive, onReady, onError, onSessionD
   const isActiveRef = useRef(isActive);
   isActiveRef.current = isActive;
-  const { setNeedsAttention } = useSessionStore();
   // Helper: update this session's state (mirrors to globals if active)
   const updateSession = useAgentStore.getState().updateSession;
@@ -186,6 +186,20 @@ export function useAgentChat({ sessionId, isActive, onReady, onError, onSessionD
         if (!tools.length) return;
         setNeedsAttention(sessionId, true);
         updateSession(sessionId, { activityStatus: { type: 'waiting-approval' } });
         // Build panel data for this session's pending approval
@@ -480,6 +494,9 @@ export function useAgentChat({ sessionId, isActive, onReady, onError, onSessionD
             );
             if (pendingIds.size > 0) setNeedsAttention(sessionId, true);
           }
           return { data, pendingIds, info };
         }
         return { data, pendingIds, info: null };
@@ -562,7 +579,15 @@ export function useAgentChat({ sessionId, isActive, onReady, onError, onSessionD
             return true;
           } else if (et === 'approval_required') {
             sideChannel.onApprovalRequired(
-              (event.data?.tools || []) as Array<{ tool: string; arguments: Record<string, unknown>; tool_call_id: string }>,
             );
             stopReconnect();
             const result = await hydrateMessages();

   const isActiveRef = useRef(isActive);
   isActiveRef.current = isActive;
+  const { setNeedsAttention, updateSessionYolo } = useSessionStore();
   // Helper: update this session's state (mirrors to globals if active)
   const updateSession = useAgentStore.getState().updateSession;
         if (!tools.length) return;
         setNeedsAttention(sessionId, true);
+        const store = useAgentStore.getState();
+        for (const tool of tools) {
+          store.setToolBudgetBlock(
+            tool.tool_call_id,
+            tool.auto_approval_blocked
+              ? {
+                  reason: tool.block_reason ?? null,
+                  estimatedCostUsd: tool.estimated_cost_usd ?? null,
+                  remainingCapUsd: tool.remaining_cap_usd ?? null,
+                }
+              : null,
+          );
+        }
         updateSession(sessionId, { activityStatus: { type: 'waiting-approval' } });
         // Build panel data for this session's pending approval
             );
             if (pendingIds.size > 0) setNeedsAttention(sessionId, true);
           }
+          if (info.auto_approval) {
+            updateSessionYolo(sessionId, info.auto_approval);
+          }
           return { data, pendingIds, info };
         }
         return { data, pendingIds, info: null };
             return true;
           } else if (et === 'approval_required') {
             sideChannel.onApprovalRequired(
+              (event.data?.tools || []) as Array<{
+                tool: string;
+                arguments: Record<string, unknown>;
+                tool_call_id: string;
+                auto_approval_blocked?: boolean;
+                block_reason?: string | null;
+                estimated_cost_usd?: number | null;
+                remaining_cap_usd?: number | null;
+              }>,
             );
             stopReconnect();
             const result = await hydrateMessages();

frontend/src/lib/sse-chat-transport.ts CHANGED Viewed

@@ -26,7 +26,15 @@ export interface SideChannelCallbacks {
   onToolLog: (tool: string, log: string, agentId?: string, label?: string) => void;
   onConnectionChange: (connected: boolean) => void;
   onSessionDead: (sessionId: string) => void;
-  onApprovalRequired: (tools: Array<{ tool: string; arguments: Record<string, unknown>; tool_call_id: string }>) => void;
   onToolCallPanel: (tool: string, args: Record<string, unknown>) => void;
   onToolOutputPanel: (tool: string, toolCallId: string, output: string, success: boolean) => void;
   onStreaming: () => void;
@@ -236,6 +244,10 @@ function createEventToChunkStream(sideChannel: SideChannelCallbacks): TransformS
             tool: string;
             arguments: Record<string, unknown>;
             tool_call_id: string;
           }>;
           if (!tools) break;

   onToolLog: (tool: string, log: string, agentId?: string, label?: string) => void;
   onConnectionChange: (connected: boolean) => void;
   onSessionDead: (sessionId: string) => void;
+  onApprovalRequired: (tools: Array<{
+    tool: string;
+    arguments: Record<string, unknown>;
+    tool_call_id: string;
+    auto_approval_blocked?: boolean;
+    block_reason?: string | null;
+    estimated_cost_usd?: number | null;
+    remaining_cap_usd?: number | null;
+  }>) => void;
   onToolCallPanel: (tool: string, args: Record<string, unknown>) => void;
   onToolOutputPanel: (tool: string, toolCallId: string, output: string, success: boolean) => void;
   onStreaming: () => void;
             tool: string;
             arguments: Record<string, unknown>;
             tool_call_id: string;
+            auto_approval_blocked?: boolean;
+            block_reason?: string | null;
+            estimated_cost_usd?: number | null;
+            remaining_cap_usd?: number | null;
           }>;
           if (!tools) break;

frontend/src/store/agentStore.ts CHANGED Viewed

@@ -50,6 +50,12 @@ export interface JobsUpgradeState {
   namespace?: string | null;
 }
 export type ActivityStatus =
   | { type: 'idle' }
   | { type: 'thinking' }
@@ -145,6 +151,9 @@ interface AgentStore {
   // Tool rejected states (tool_call_id -> true if rejected by user) - persisted across renders
   rejectedTools: Record<string, boolean>;
   // ── Per-session actions ─────────────────────────────────────────────
   /** Update a session's state. If it's the active session, also update flat state. */
@@ -196,6 +205,9 @@ interface AgentStore {
   setToolRejected: (toolCallId: string, isRejected: boolean) => void;
   getToolRejected: (toolCallId: string) => boolean | undefined;
 }
 /**
@@ -300,6 +312,7 @@ export const useAgentStore = create<AgentStore>()((set, get) => ({
   trackioDashboards: loadTrackioDashboards(),
   toolErrors: loadToolErrors(),
   rejectedTools: loadRejectedTools(),
   // ── Per-session state management ──────────────────────────────────
@@ -529,4 +542,24 @@ export const useAgentStore = create<AgentStore>()((set, get) => ({
   },
   getToolRejected: (toolCallId) => get().rejectedTools[toolCallId],
 }));

   namespace?: string | null;
 }
+export interface ToolBudgetBlockState {
+  reason?: string | null;
+  estimatedCostUsd?: number | null;
+  remainingCapUsd?: number | null;
+}
 export type ActivityStatus =
   | { type: 'idle' }
   | { type: 'thinking' }
   // Tool rejected states (tool_call_id -> true if rejected by user) - persisted across renders
   rejectedTools: Record<string, boolean>;
+  // Tool budget-block metadata (tool_call_id -> display metadata) - transient UI state
+  budgetBlocks: Record<string, ToolBudgetBlockState>;
   // ── Per-session actions ─────────────────────────────────────────────
   /** Update a session's state. If it's the active session, also update flat state. */
   setToolRejected: (toolCallId: string, isRejected: boolean) => void;
   getToolRejected: (toolCallId: string) => boolean | undefined;
+  setToolBudgetBlock: (toolCallId: string, block: ToolBudgetBlockState | null) => void;
+  getToolBudgetBlock: (toolCallId: string) => ToolBudgetBlockState | undefined;
 }
 /**
   trackioDashboards: loadTrackioDashboards(),
   toolErrors: loadToolErrors(),
   rejectedTools: loadRejectedTools(),
+  budgetBlocks: {},
   // ── Per-session state management ──────────────────────────────────
   },
   getToolRejected: (toolCallId) => get().rejectedTools[toolCallId],
+  // ── Tool Budget Blocks ───────────────────────────────────────────────
+  setToolBudgetBlock: (toolCallId, block) => {
+    set((state) => {
+      if (!block) {
+        const next = { ...state.budgetBlocks };
+        delete next[toolCallId];
+        return { budgetBlocks: next };
+      }
+      return {
+        budgetBlocks: {
+          ...state.budgetBlocks,
+          [toolCallId]: block,
+        },
+      };
+    });
+  },
+  getToolBudgetBlock: (toolCallId) => get().budgetBlocks[toolCallId],
 }));

frontend/src/store/sessionStore.ts CHANGED Viewed

@@ -27,7 +27,19 @@ interface SessionStore {
     created_at: string;
     is_active?: boolean;
     pending_approval?: unknown[] | null;
   }>) => void;
   /** Atomically swap a session's id in the list + both localStorage caches.
    *  Used when we rehydrate an expired session into a freshly-created backend
    *  session — preserves title, timestamps, and messages. */
@@ -47,6 +59,10 @@ export const useSessionStore = create<SessionStore>()(
           createdAt: new Date().toISOString(),
           isActive: true,
           needsAttention: false,
         };
         set((state) => ({
           sessions: [...state.sessions, newSession],
@@ -93,12 +109,21 @@ export const useSessionStore = create<SessionStore>()(
             if (!id) continue;
             const existing = byId.get(id);
             if (existing) {
               const updated = {
                 ...existing,
                 title: server.title || existing.title,
                 isActive: server.is_active ?? existing.isActive,
                 needsAttention: Boolean(server.pending_approval?.length) || existing.needsAttention,
                 expired: false,
               };
               const idx = merged.findIndex((s) => s.id === id);
               if (idx >= 0) merged[idx] = updated;
@@ -112,6 +137,10 @@ export const useSessionStore = create<SessionStore>()(
               isActive: server.is_active ?? true,
               needsAttention: Boolean(server.pending_approval?.length),
               expired: false,
             };
             merged.push(newSession);
             byId.set(id, newSession);
@@ -123,6 +152,22 @@ export const useSessionStore = create<SessionStore>()(
         });
       },
       renameSession: (oldId: string, newId: string) => {
         if (oldId === newId) return;
         moveMessages(oldId, newId);

     created_at: string;
     is_active?: boolean;
     pending_approval?: unknown[] | null;
+    auto_approval?: {
+      enabled?: boolean;
+      cost_cap_usd?: number | null;
+      estimated_spend_usd?: number;
+      remaining_usd?: number | null;
+    } | null;
   }>) => void;
+  updateSessionYolo: (id: string, policy: {
+    enabled: boolean;
+    cost_cap_usd?: number | null;
+    estimated_spend_usd?: number;
+    remaining_usd?: number | null;
+  }) => void;
   /** Atomically swap a session's id in the list + both localStorage caches.
    *  Used when we rehydrate an expired session into a freshly-created backend
    *  session — preserves title, timestamps, and messages. */
           createdAt: new Date().toISOString(),
           isActive: true,
           needsAttention: false,
+          autoApprovalEnabled: false,
+          autoApprovalCostCapUsd: null,
+          autoApprovalEstimatedSpendUsd: 0,
+          autoApprovalRemainingUsd: null,
         };
         set((state) => ({
           sessions: [...state.sessions, newSession],
             if (!id) continue;
             const existing = byId.get(id);
             if (existing) {
+              const auto = server.auto_approval;
               const updated = {
                 ...existing,
                 title: server.title || existing.title,
                 isActive: server.is_active ?? existing.isActive,
                 needsAttention: Boolean(server.pending_approval?.length) || existing.needsAttention,
                 expired: false,
+                ...(auto
+                  ? {
+                      autoApprovalEnabled: Boolean(auto.enabled),
+                      autoApprovalCostCapUsd: auto.cost_cap_usd ?? null,
+                      autoApprovalEstimatedSpendUsd: auto.estimated_spend_usd ?? 0,
+                      autoApprovalRemainingUsd: auto.remaining_usd ?? null,
+                    }
+                  : {}),
               };
               const idx = merged.findIndex((s) => s.id === id);
               if (idx >= 0) merged[idx] = updated;
               isActive: server.is_active ?? true,
               needsAttention: Boolean(server.pending_approval?.length),
               expired: false,
+              autoApprovalEnabled: Boolean(server.auto_approval?.enabled),
+              autoApprovalCostCapUsd: server.auto_approval?.cost_cap_usd ?? null,
+              autoApprovalEstimatedSpendUsd: server.auto_approval?.estimated_spend_usd ?? 0,
+              autoApprovalRemainingUsd: server.auto_approval?.remaining_usd ?? null,
             };
             merged.push(newSession);
             byId.set(id, newSession);
         });
       },
+      updateSessionYolo: (id, policy) => {
+        set((state) => ({
+          sessions: state.sessions.map((s) =>
+            s.id === id
+              ? {
+                  ...s,
+                  autoApprovalEnabled: policy.enabled,
+                  autoApprovalCostCapUsd: policy.cost_cap_usd ?? null,
+                  autoApprovalEstimatedSpendUsd: policy.estimated_spend_usd ?? 0,
+                  autoApprovalRemainingUsd: policy.remaining_usd ?? null,
+                }
+              : s,
+          ),
+        }));
+      },
       renameSession: (oldId: string, newId: string) => {
         if (oldId === newId) return;
         moveMessages(oldId, newId);

frontend/src/types/agent.ts CHANGED Viewed

@@ -21,6 +21,10 @@ export interface SessionMeta {
    *  disables input until the user chooses to restore-with-summary or
    *  start fresh. */
   expired?: boolean;
 }
 export interface ToolApproval {

    *  disables input until the user chooses to restore-with-summary or
    *  start fresh. */
   expired?: boolean;
+  autoApprovalEnabled?: boolean;
+  autoApprovalCostCapUsd?: number | null;
+  autoApprovalEstimatedSpendUsd?: number;
+  autoApprovalRemainingUsd?: number | null;
 }
 export interface ToolApproval {

frontend/src/types/events.ts CHANGED Viewed

@@ -68,6 +68,10 @@ export interface ApprovalToolItem {
   tool: string;
   arguments: Record<string, unknown>;
   tool_call_id: string;
 }
 export interface TurnCompleteEventData {

   tool: string;
   arguments: Record<string, unknown>;
   tool_call_id: string;
+  auto_approval_blocked?: boolean;
+  block_reason?: string | null;
+  estimated_cost_usd?: number | null;
+  remaining_cap_usd?: number | null;
 }
 export interface TurnCompleteEventData {

tests/unit/test_agent_model_gating.py CHANGED Viewed

@@ -127,3 +127,48 @@ async def test_user_quota_response_uses_premium_fields_only(monkeypatch):
         "premium_daily_cap": 5,
         "premium_remaining": 3,
     }

         "premium_daily_cap": 5,
         "premium_remaining": 3,
     }
+@pytest.mark.asyncio
+async def test_set_session_yolo_calls_manager_with_cap_presence(monkeypatch):
+    async def fake_check_session_access(session_id, user, request=None):
+        assert session_id == "s1"
+        assert user["user_id"] == "u1"
+        return object()
+    calls = []
+    async def fake_update_session_auto_approval(session_id, **kwargs):
+        calls.append((session_id, kwargs))
+        return {
+            "enabled": kwargs["enabled"],
+            "cost_cap_usd": 7.5,
+            "estimated_spend_usd": 0.0,
+            "remaining_usd": 7.5,
+        }
+    monkeypatch.setattr(agent, "_check_session_access", fake_check_session_access)
+    monkeypatch.setattr(
+        agent.session_manager,
+        "update_session_auto_approval",
+        fake_update_session_auto_approval,
+    )
+    response = await agent.set_session_yolo(
+        "s1",
+        agent.SessionYoloRequest(enabled=True, cost_cap_usd=7.5),
+        {"user_id": "u1"},
+    )
+    assert response["enabled"] is True
+    assert response["remaining_usd"] == 7.5
+    assert calls == [
+        (
+            "s1",
+            {
+                "enabled": True,
+                "cost_cap_usd": 7.5,
+                "cap_provided": True,
+            },
+        )
+    ]

tests/unit/test_auto_approval_policy.py ADDED Viewed

	@@ -0,0 +1,185 @@

+from types import SimpleNamespace
+import pytest
+from agent.config import Config
+from agent.core import agent_loop
+from agent.core.cost_estimation import CostEstimate
+def _config(**overrides):
+    data = {
+        "model_name": "moonshotai/Kimi-K2.6",
+        "confirm_cpu_jobs": True,
+        "auto_file_upload": False,
+        "yolo_mode": False,
+        **overrides,
+    }
+    return Config.model_validate(data)
+def _session(*, cap=5.0, spent=0.0, enabled=True):
+    return SimpleNamespace(
+        config=_config(),
+        auto_approval_enabled=enabled,
+        auto_approval_cost_cap_usd=cap,
+        auto_approval_estimated_spend_usd=spent,
+        sandbox=None,
+    )
+@pytest.mark.asyncio
+async def test_session_yolo_auto_approves_non_costed_approval_tool():
+    decision = await agent_loop._approval_decision(
+        "hf_repo_files",
+        {"operation": "upload", "path": "README.md"},
+        _session(),
+    )
+    assert decision.requires_approval is False
+    assert decision.auto_approved is True
+@pytest.mark.asyncio
+@pytest.mark.parametrize(
+    "operation",
+    ["scheduled run", "scheduled uv", "scheduled  run"],
+)
+async def test_scheduled_hf_jobs_always_require_manual_approval(operation):
+    session = _session()
+    session.config.yolo_mode = True
+    decision = await agent_loop._approval_decision(
+        "hf_jobs",
+        {"operation": operation, "script": "print(1)"},
+        session,
+    )
+    assert decision.requires_approval is True
+    assert decision.auto_approval_blocked is True
+    assert "Scheduled HF jobs" in decision.block_reason
+    assert agent_loop._needs_approval("hf_jobs", {"operation": operation}, session.config)
+@pytest.mark.asyncio
+async def test_immediate_hf_job_under_cap_auto_runs(monkeypatch):
+    async def fake_estimate(*args, **kwargs):
+        return CostEstimate(estimated_cost_usd=2.0, billable=True)
+    monkeypatch.setattr(agent_loop, "estimate_tool_cost", fake_estimate)
+    decision = await agent_loop._approval_decision(
+        "hf_jobs",
+        {"operation": "run", "hardware_flavor": "a10g-large", "timeout": "1h"},
+        _session(cap=5.0, spent=1.0),
+    )
+    assert decision.requires_approval is False
+    assert decision.auto_approved is True
+    assert decision.estimated_cost_usd == 2.0
+@pytest.mark.asyncio
+async def test_immediate_hf_job_over_cap_falls_back_to_approval(monkeypatch):
+    async def fake_estimate(*args, **kwargs):
+        return CostEstimate(estimated_cost_usd=2.0, billable=True)
+    monkeypatch.setattr(agent_loop, "estimate_tool_cost", fake_estimate)
+    decision = await agent_loop._approval_decision(
+        "hf_jobs",
+        {"operation": "run", "hardware_flavor": "a10g-large", "timeout": "1h"},
+        _session(cap=5.0, spent=4.0),
+    )
+    assert decision.requires_approval is True
+    assert decision.auto_approval_blocked is True
+    assert "exceeds" in decision.block_reason
+    assert decision.remaining_cap_usd == 1.0
+@pytest.mark.asyncio
+async def test_unknown_cost_falls_back_to_approval(monkeypatch):
+    async def fake_estimate(*args, **kwargs):
+        return CostEstimate(
+            estimated_cost_usd=None,
+            billable=True,
+            block_reason="No price is available.",
+        )
+    monkeypatch.setattr(agent_loop, "estimate_tool_cost", fake_estimate)
+    decision = await agent_loop._approval_decision(
+        "sandbox_create",
+        {"hardware": "mystery-gpu"},
+        _session(),
+    )
+    assert decision.requires_approval is True
+    assert decision.auto_approval_blocked is True
+    assert decision.estimated_cost_usd is None
+@pytest.mark.asyncio
+async def test_batch_reservation_blocks_second_over_budget_job(monkeypatch):
+    async def fake_estimate(*args, **kwargs):
+        return CostEstimate(estimated_cost_usd=3.0, billable=True)
+    monkeypatch.setattr(agent_loop, "estimate_tool_cost", fake_estimate)
+    session = _session(cap=5.0, spent=0.0)
+    first = await agent_loop._approval_decision(
+        "hf_jobs",
+        {"operation": "run", "hardware_flavor": "a10g-large"},
+        session,
+        reserved_spend_usd=0.0,
+    )
+    second = await agent_loop._approval_decision(
+        "hf_jobs",
+        {"operation": "run", "hardware_flavor": "a10g-large"},
+        session,
+        reserved_spend_usd=first.estimated_cost_usd or 0.0,
+    )
+    assert first.requires_approval is False
+    assert second.requires_approval is True
+    assert second.remaining_cap_usd == 2.0
+@pytest.mark.asyncio
+async def test_manual_approval_does_not_record_spend_when_session_yolo_disabled(monkeypatch):
+    called = False
+    async def fake_estimate(*args, **kwargs):
+        nonlocal called
+        called = True
+        return CostEstimate(estimated_cost_usd=2.0, billable=True)
+    monkeypatch.setattr(agent_loop, "estimate_tool_cost", fake_estimate)
+    session = _session(enabled=False, cap=5.0, spent=0.0)
+    await agent_loop._record_manual_approved_spend_if_needed(
+        session,
+        "sandbox_create",
+        {"hardware": "a10g-large"},
+    )
+    assert called is False
+    assert session.auto_approval_estimated_spend_usd == 0.0
+@pytest.mark.asyncio
+async def test_manual_approval_records_spend_when_session_yolo_enabled(monkeypatch):
+    async def fake_estimate(*args, **kwargs):
+        return CostEstimate(estimated_cost_usd=1.25, billable=True)
+    monkeypatch.setattr(agent_loop, "estimate_tool_cost", fake_estimate)
+    session = _session(enabled=True, cap=5.0, spent=0.5)
+    await agent_loop._record_manual_approved_spend_if_needed(
+        session,
+        "sandbox_create",
+        {"hardware": "a10g-large"},
+    )
+    assert session.auto_approval_estimated_spend_usd == 1.75

tests/unit/test_cost_estimation.py ADDED Viewed

	@@ -0,0 +1,58 @@

+from types import SimpleNamespace
+import pytest
+from agent.core import cost_estimation
+def test_parse_timeout_hours_common_units():
+    assert cost_estimation.parse_timeout_hours(None) == 0.5
+    assert cost_estimation.parse_timeout_hours("30m") == 0.5
+    assert cost_estimation.parse_timeout_hours("3h") == 3
+    assert cost_estimation.parse_timeout_hours(3600) == 1
+    assert cost_estimation.parse_timeout_hours("not-a-duration") is None
+@pytest.mark.asyncio
+async def test_estimate_hf_job_cost_uses_catalog_price(monkeypatch):
+    async def fake_catalog():
+        return {"a100-large": 4.0}
+    monkeypatch.setattr(cost_estimation, "hf_jobs_price_catalog", fake_catalog)
+    estimate = await cost_estimation.estimate_hf_job_cost(
+        {"hardware_flavor": "a100-large", "timeout": "8h"}
+    )
+    assert estimate.estimated_cost_usd == 32.0
+    assert estimate.billable is True
+@pytest.mark.asyncio
+async def test_estimate_hf_job_cost_blocks_unknown_price(monkeypatch):
+    async def fake_catalog():
+        return {}
+    monkeypatch.setattr(cost_estimation, "hf_jobs_price_catalog", fake_catalog)
+    estimate = await cost_estimation.estimate_hf_job_cost(
+        {"hardware_flavor": "mystery-gpu", "timeout": "30m"}
+    )
+    assert estimate.estimated_cost_usd is None
+    assert estimate.billable is True
+    assert "No price" in estimate.block_reason
+@pytest.mark.asyncio
+async def test_estimate_sandbox_cost_is_zero_for_existing_or_cpu_basic():
+    existing = await cost_estimation.estimate_sandbox_cost(
+        {"hardware": "a100-large"},
+        session=SimpleNamespace(sandbox=object()),
+    )
+    cpu = await cost_estimation.estimate_sandbox_cost({"hardware": "cpu-basic"})
+    assert existing.estimated_cost_usd == 0.0
+    assert existing.billable is False
+    assert cpu.estimated_cost_usd == 0.0
+    assert cpu.billable is False

tests/unit/test_session_manager_persistence.py CHANGED Viewed

@@ -27,6 +27,23 @@ class FakeRuntimeSession:
         self.turn_count = 0
         self.config = SimpleNamespace(model_name=model)
         self.notification_destinations = []
 class RestoreStore(NoopSessionStore):
@@ -85,6 +102,24 @@ def _runtime_agent_session(
     )
 def _install_fake_runtime(manager: SessionManager) -> asyncio.Event:
     stop = asyncio.Event()
     manager.run_calls = 0  # type: ignore[attr-defined]
@@ -204,6 +239,34 @@ async def test_lazy_restore_preserves_pending_approval_tool_calls():
         await _cancel_runtime_tasks(manager)
 @pytest.mark.asyncio
 async def test_list_sessions_dev_uses_store_dev_visibility():
     class ListStore(NoopSessionStore):
@@ -221,6 +284,9 @@ async def test_list_sessions_dev_uses_store_dev_visibility():
                         "user_id": "alice",
                         "model": "m",
                         "created_at": datetime.now(UTC),
                     },
                     {
                         "session_id": "s2",
@@ -238,3 +304,10 @@ async def test_list_sessions_dev_uses_store_dev_visibility():
     assert store.seen_user_id == "dev"
     assert {session["session_id"] for session in sessions} == {"s1", "s2"}

         self.turn_count = 0
         self.config = SimpleNamespace(model_name=model)
         self.notification_destinations = []
+        self.auto_approval_enabled = False
+        self.auto_approval_cost_cap_usd = None
+        self.auto_approval_estimated_spend_usd = 0.0
+    def auto_approval_policy_summary(self):
+        cap = self.auto_approval_cost_cap_usd
+        remaining = None if cap is None else max(0, cap - self.auto_approval_estimated_spend_usd)
+        return {
+            "enabled": self.auto_approval_enabled,
+            "cost_cap_usd": cap,
+            "estimated_spend_usd": self.auto_approval_estimated_spend_usd,
+            "remaining_usd": remaining,
+        }
+    def set_auto_approval_policy(self, *, enabled, cost_cap_usd):
+        self.auto_approval_enabled = enabled
+        self.auto_approval_cost_cap_usd = cost_cap_usd
 class RestoreStore(NoopSessionStore):
     )
+@pytest.mark.asyncio
+async def test_update_session_auto_approval_defaults_to_five_dollars():
+    manager = _manager_with_store(NoopSessionStore())
+    existing = _runtime_agent_session("s1", user_id="owner")
+    manager.sessions["s1"] = existing
+    summary = await manager.update_session_auto_approval(
+        "s1",
+        enabled=True,
+        cost_cap_usd=None,
+        cap_provided=False,
+    )
+    assert summary["enabled"] is True
+    assert summary["cost_cap_usd"] == 5.0
+    assert summary["remaining_usd"] == 5.0
 def _install_fake_runtime(manager: SessionManager) -> asyncio.Event:
     stop = asyncio.Event()
     manager.run_calls = 0  # type: ignore[attr-defined]
         await _cancel_runtime_tasks(manager)
+@pytest.mark.asyncio
+async def test_lazy_restore_preserves_auto_approval_policy():
+    store = RestoreStore(
+        metadata={
+            "session_id": "yolo-session",
+            "user_id": "owner",
+            "model": "test-model",
+            "auto_approval_enabled": True,
+            "auto_approval_cost_cap_usd": 5.0,
+            "auto_approval_estimated_spend_usd": 1.25,
+        }
+    )
+    manager = _manager_with_store(store)
+    stop = _install_fake_runtime(manager)
+    try:
+        restored = await manager.ensure_session_loaded("yolo-session", user_id="owner")
+        assert restored is not None
+        assert restored.session.auto_approval_enabled is True
+        assert restored.session.auto_approval_cost_cap_usd == 5.0
+        assert restored.session.auto_approval_estimated_spend_usd == 1.25
+        assert restored.session.auto_approval_policy_summary()["remaining_usd"] == 3.75
+    finally:
+        stop.set()
+        await _cancel_runtime_tasks(manager)
 @pytest.mark.asyncio
 async def test_list_sessions_dev_uses_store_dev_visibility():
     class ListStore(NoopSessionStore):
                         "user_id": "alice",
                         "model": "m",
                         "created_at": datetime.now(UTC),
+                        "auto_approval_enabled": True,
+                        "auto_approval_cost_cap_usd": 5.0,
+                        "auto_approval_estimated_spend_usd": 2.0,
                     },
                     {
                         "session_id": "s2",
     assert store.seen_user_id == "dev"
     assert {session["session_id"] for session in sessions} == {"s1", "s2"}
+    yolo = next(session for session in sessions if session["session_id"] == "s1")
+    assert yolo["auto_approval"] == {
+        "enabled": True,
+        "cost_cap_usd": 5.0,
+        "estimated_spend_usd": 2.0,
+        "remaining_usd": 3.0,
+    }