Spaces:
Running
Running
File size: 31,927 Bytes
1635e66 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 | """
Sandbox tools β expose the Sandbox client as agent tools.
5 tools total:
sandbox_create β create/replace sandbox for non-default hardware
bash, read, write, edit β operations on the active sandbox
A cpu-basic sandbox is preloaded for each session. Operation tools wait for it
if startup is still in progress.
"""
from __future__ import annotations
import asyncio
import logging
import re
import threading
import uuid
import weakref
from collections.abc import Callable
from datetime import datetime, timezone
from typing import Any
from huggingface_hub import HfApi, SpaceHardware
from agent.core.cost_estimation import (
DEFAULT_SANDBOX_RESERVATION_HOURS,
SPACE_PRICE_USD_PER_HOUR,
CostEstimate,
)
from agent.core.hub_artifacts import wrap_shell_command_with_hub_artifact_bootstrap
from agent.core.session import Event
from agent.tools.sandbox_client import Sandbox
from agent.tools.trackio_seed import ensure_trackio_dashboard
logger = logging.getLogger(__name__)
DEFAULT_CPU_SANDBOX_HARDWARE = "cpu-basic"
# Match the exact suffix pattern Sandbox.create produces: "sandbox-<8 hex>".
# Used to identify orphan sandboxes from prior sessions safely (won't match
# user-renamed lookalikes).
SANDBOX_SPACE_NAME_RE = re.compile(r"^sandbox-[a-f0-9]{8}$")
# HF Space duplication/build APIs can behave poorly when multiple private
# sandboxes are created concurrently for the same namespace. Keep session
# creation non-blocking, but serialize the actual Hub create path per owner.
_SANDBOX_CREATE_LOCKS: weakref.WeakKeyDictionary[
asyncio.AbstractEventLoop, dict[str, asyncio.Lock]
] = weakref.WeakKeyDictionary()
_SANDBOX_YOLO_RENEWAL_FRACTION = 0.95
def _get_sandbox_create_lock(owner: str) -> asyncio.Lock:
loop = asyncio.get_running_loop()
locks = _SANDBOX_CREATE_LOCKS.setdefault(loop, {})
lock = locks.get(owner)
if lock is None:
lock = asyncio.Lock()
locks[owner] = lock
return lock
def _sandbox_window_cost_usd(hardware: str) -> float | None:
price = SPACE_PRICE_USD_PER_HOUR.get(str(hardware))
if price is None:
return None
return round(float(price) * DEFAULT_SANDBOX_RESERVATION_HOURS, 4)
def _sandbox_window_estimate(hardware: str) -> CostEstimate:
cost = _sandbox_window_cost_usd(hardware)
if cost is None:
return CostEstimate(
estimated_cost_usd=None,
billable=True,
block_reason=f"No price is available for sandbox hardware '{hardware}'.",
label=hardware,
)
return CostEstimate(
estimated_cost_usd=cost,
billable=cost > 0,
label=hardware,
)
def _sandbox_yolo_renewal_delay_s() -> float:
return max(
1.0,
DEFAULT_SANDBOX_RESERVATION_HOURS * 3600 * _SANDBOX_YOLO_RENEWAL_FRACTION,
)
def _sandbox_yolo_finalized_cost_usd(session: Any) -> float:
return max(0.0, float(getattr(session, "_sandbox_yolo_finalized_cost_usd", 0.0)))
def _add_sandbox_yolo_finalized_cost(session: Any, amount_usd: float | None) -> None:
if amount_usd is None or amount_usd <= 0:
return
session._sandbox_yolo_finalized_cost_usd = round(
_sandbox_yolo_finalized_cost_usd(session) + float(amount_usd),
4,
)
def _cancel_sandbox_yolo_renewal(session: Any) -> None:
task = getattr(session, "_sandbox_yolo_renewal_task", None)
if task and not task.done() and task is not asyncio.current_task():
task.cancel()
session._sandbox_yolo_renewal_task = None
def _start_sandbox_yolo_renewal(
session: Any,
*,
hardware: str,
reservation_id: str,
) -> None:
if hardware == DEFAULT_CPU_SANDBOX_HARDWARE:
return
_cancel_sandbox_yolo_renewal(session)
task = asyncio.create_task(
_sandbox_yolo_renewal_loop(
session,
hardware=hardware,
reservation_id=reservation_id,
)
)
session._sandbox_yolo_renewal_task = task
def _log_task_error(done: asyncio.Task) -> None:
if done.cancelled():
return
try:
done.result()
except Exception as e:
logger.warning("Sandbox YOLO renewal task failed: %s", e)
task.add_done_callback(_log_task_error)
async def _sandbox_yolo_renewal_loop(
session: Any,
*,
hardware: str,
reservation_id: str,
) -> None:
from agent.core.yolo_budget import (
reconcile_budget_reservation,
reserve_session_budget,
session_yolo_enabled,
)
active_reservation_id = reservation_id
while True:
await asyncio.sleep(_sandbox_yolo_renewal_delay_s())
if (
getattr(session, "_sandbox_yolo_reservation_id", None)
!= active_reservation_id
):
return
if not getattr(session, "sandbox", None):
return
if getattr(session, "sandbox_hardware", None) != hardware:
return
window_cost = _sandbox_window_cost_usd(hardware)
reconcile_budget_reservation(session, active_reservation_id, window_cost)
_add_sandbox_yolo_finalized_cost(session, window_cost)
if not session_yolo_enabled(session):
session._sandbox_yolo_reservation_id = None
return
estimate = _sandbox_window_estimate(hardware)
next_reservation_id = f"sandbox-renew-{uuid.uuid4().hex[:10]}"
decision = reserve_session_budget(
session,
estimate,
spend_kind="sandbox",
reservation_id=next_reservation_id,
)
if not decision.allowed:
session._sandbox_yolo_reservation_id = None
await session.send_event(
Event(
event_type="tool_log",
data={
"tool": "sandbox",
"log": (
"YOLO usage cap reached for the active sandbox; "
"tearing it down before the reserved budget expires."
),
},
)
)
await teardown_session_sandbox(session)
return
active_reservation_id = (
decision.reservation.reservation_id
if decision.reservation
else next_reservation_id
)
session._sandbox_yolo_reservation_id = active_reservation_id
def _session_tool_logger(
session: Any, *, tool: str = "sandbox"
) -> Callable[[str], object] | None:
event_queue = getattr(session, "event_queue", None)
if event_queue is None:
return None
loop = asyncio.get_running_loop()
def _log(msg: str) -> None:
loop.call_soon_threadsafe(
event_queue.put_nowait,
Event(event_type="tool_log", data={"tool": tool, "log": msg}),
)
return _log
def _looks_like_path(script: str) -> bool:
"""Return True if the script string looks like a file path (not inline code)."""
if not (
isinstance(script, str)
and script.strip() == script
and not any(c in script for c in "\r\n\0")
):
return False
if script.startswith("http://") or script.startswith("https://"):
return False
return (
script.startswith("/")
or script.startswith("./")
or script.startswith("../")
or (script.endswith(".py") and not any(c.isspace() for c in script))
)
async def resolve_sandbox_script(
sandbox: Any, script: str
) -> tuple[str | None, str | None]:
"""Read a file from the sandbox if *script* looks like a path.
Returns:
(content, error) β content is the file text on success,
error is a message on failure. Both None means *script*
is not a path (caller should use it as-is).
"""
if not sandbox or not _looks_like_path(script):
return None, None
try:
# Use the read endpoint instead of bash("cat ...") which truncates at 25KB.
result = await asyncio.to_thread(sandbox.read, script, limit=100_000)
if result.success and result.output:
# Strip line number prefixes (read returns "N\tcontent" format)
lines = []
for line in result.output.split("\n"):
parts = line.split("\t", 1)
lines.append(parts[1] if len(parts) == 2 else line)
return "\n".join(lines), None
return None, f"Failed to read {script} from sandbox: {result.error}"
except Exception as e:
return None, f"Failed to read {script} from sandbox: {e}"
async def _seed_trackio_dashboard_safe(session: Any, space_id: str) -> None:
"""Idempotently seed *space_id* with trackio dashboard files using the
session's HF token. Logs progress, swallows errors β a failed seed should
not block sandbox creation."""
if not session or not getattr(session, "hf_token", None):
return
loop = asyncio.get_running_loop()
def _log(msg: str) -> None:
loop.call_soon_threadsafe(
session.event_queue.put_nowait,
Event(event_type="tool_log", data={"tool": "sandbox_create", "log": msg}),
)
try:
await asyncio.to_thread(
ensure_trackio_dashboard, space_id, session.hf_token, _log
)
except Exception as e:
_log(f"trackio dashboard seed failed: {e}")
async def _update_persisted_sandbox_fields(session: Any, **fields: Any) -> None:
"""Best-effort update of sandbox metadata on the durable session record."""
store = getattr(session, "persistence_store", None)
session_id = getattr(session, "session_id", None)
if not (store and session_id and hasattr(store, "update_session_fields")):
return
try:
await store.update_session_fields(session_id, **fields)
except Exception as e:
logger.warning("Failed to persist sandbox metadata for %s: %s", session_id, e)
async def _persist_active_sandbox(
session: Any,
sandbox: Sandbox,
*,
hardware: str,
) -> None:
space_id = getattr(sandbox, "space_id", None)
if not space_id:
return
owner = space_id.split("/", 1)[0] if "/" in space_id else None
await _update_persisted_sandbox_fields(
session,
sandbox_space_id=space_id,
sandbox_hardware=hardware,
sandbox_owner=owner,
sandbox_created_at=datetime.now(timezone.utc),
sandbox_status="active",
)
async def _clear_persisted_sandbox(session: Any) -> None:
await _update_persisted_sandbox_fields(
session,
sandbox_space_id=None,
sandbox_hardware=None,
sandbox_owner=None,
sandbox_created_at=None,
sandbox_status="destroyed",
)
# ββ Tool name mapping (short agent names β Sandbox client names) ββββββ
async def _ensure_sandbox(
session: Any,
hardware: str = DEFAULT_CPU_SANDBOX_HARDWARE,
extra_secrets: dict[str, str] | None = None,
cancel_event: threading.Event | None = None,
**create_kwargs,
) -> tuple[Sandbox | None, str | None]:
"""
Ensure a sandbox exists on the session. Auto-creates with given hardware if needed.
Returns:
(sandbox, error_message) β one will be None.
"""
if session and getattr(session, "sandbox", None):
return session.sandbox, None
if not session:
return None, "No session available."
token = session.hf_token
if not token:
return None, "No HF token available. Cannot create sandbox."
api = HfApi(token=token)
user_info = api.whoami()
owner = user_info.get("name", user_info.get("user", ""))
if not owner:
return None, "Could not determine HF username from token."
create_lock = _get_sandbox_create_lock(owner)
if create_lock.locked():
await session.send_event(
Event(
event_type="tool_log",
data={
"tool": "sandbox",
"log": "Waiting for sandbox creation slot...",
},
)
)
async with create_lock:
if getattr(session, "sandbox", None):
return session.sandbox, None
return await _create_sandbox_locked(
session,
api=api,
owner=owner,
hardware=hardware,
extra_secrets=extra_secrets,
cancel_event=cancel_event,
**create_kwargs,
)
async def _create_sandbox_locked(
session: Any,
*,
api: HfApi,
owner: str,
hardware: str,
extra_secrets: dict[str, str] | None = None,
cancel_event: threading.Event | None = None,
**create_kwargs,
) -> tuple[Sandbox | None, str | None]:
"""Create the Space while the per-owner sandbox creation lock is held."""
token = session.hf_token
await session.send_event(
Event(
event_type="tool_log",
data={
"tool": "sandbox",
"log": f"Auto-creating sandbox for {owner} ({hardware})...",
},
)
)
# Thread-safe log callback: posts tool_log events from worker threads.
_log = _session_tool_logger(session) or (lambda msg: None)
# Bridge asyncio cancel event to a threading.Event for the blocking create call.
# We poll session._cancelled from the main loop in a background task and set
# a threading.Event that Sandbox.create checks during its polling loops.
cancel_flag = cancel_event or threading.Event()
async def _watch_cancel():
await session._cancelled.wait()
cancel_flag.set()
watcher_task = asyncio.create_task(_watch_cancel())
secrets: dict[str, str] = {"HF_TOKEN": token}
if extra_secrets:
secrets.update({k: v for k, v in extra_secrets.items() if v})
create_kwargs["private"] = True # enforce: overrides any caller-supplied value
kwargs = {
"owner": owner,
"hardware": hardware,
"token": token,
"secrets": secrets,
"log": _log,
"cancel_event": cancel_flag,
**create_kwargs,
}
if hardware != DEFAULT_CPU_SANDBOX_HARDWARE:
kwargs["sleep_time"] = 2700
import time as _t
_t_start = _t.monotonic()
try:
sb = await asyncio.to_thread(Sandbox.create, **kwargs)
except Sandbox.Cancelled:
return None, "Sandbox creation cancelled by user."
finally:
watcher_task.cancel()
if cancel_flag.is_set():
if getattr(sb, "_owns_space", False):
try:
await asyncio.to_thread(sb.delete, log=_log)
except Exception as e:
logger.warning(
"Failed to delete cancelled sandbox %s: %s", sb.space_id, e
)
return None, "Sandbox creation cancelled by user."
session.sandbox = sb
session.sandbox_hardware = hardware
session.sandbox_preload_error = None
await _persist_active_sandbox(session, sb, hardware=hardware)
# Telemetry: sandbox creation (infra consumption signal)
from agent.core import telemetry
await telemetry.record_sandbox_create(
session,
sb,
hardware=hardware,
create_latency_s=int(_t.monotonic() - _t_start),
)
await session.send_event(
Event(
event_type="tool_log",
data={"tool": "sandbox", "log": f"Sandbox ready: {sb.space_id} ({sb.url})"},
)
)
return sb, None
def start_cpu_sandbox_preload(session: Any) -> asyncio.Task | None:
"""Start a background ``cpu-basic`` sandbox for this session."""
if not session or getattr(session, "sandbox", None):
return None
existing_task = getattr(session, "sandbox_preload_task", None)
if existing_task and not existing_task.done():
return existing_task
cancel_event = threading.Event()
session.sandbox_preload_cancel_event = cancel_event
session.sandbox_preload_error = None
async def _preload() -> Sandbox | None:
try:
sb, error = await _ensure_sandbox(
session,
hardware=DEFAULT_CPU_SANDBOX_HARDWARE,
cancel_event=cancel_event,
)
if error:
session.sandbox_preload_error = error
return None
return sb
except asyncio.CancelledError:
cancel_event.set()
session.sandbox_preload_error = "Sandbox creation cancelled by user."
raise
except Exception as e:
session.sandbox_preload_error = f"Failed to create sandbox: {e}"
logger.warning("CPU sandbox preload failed: %s", e)
return None
task = asyncio.create_task(_preload())
session.sandbox_preload_task = task
return task
async def cancel_sandbox_preload(session: Any) -> None:
"""Best-effort cancellation for an in-flight CPU sandbox preload."""
cancel_event = getattr(session, "sandbox_preload_cancel_event", None)
if cancel_event is not None:
cancel_event.set()
task = getattr(session, "sandbox_preload_task", None)
if not task or task.done():
return
current_task = asyncio.current_task()
if task is current_task:
return
try:
await asyncio.wait_for(asyncio.shield(task), timeout=30)
except asyncio.TimeoutError:
logger.warning(
"Timed out waiting for CPU sandbox preload cancellation; "
"task is still live, cancelling asyncio wrapper"
)
task.cancel()
except asyncio.CancelledError:
raise
except Exception:
pass
async def get_active_or_preloaded_sandbox(
session: Any,
) -> tuple[Sandbox | None, str | None]:
"""Return the active sandbox, waiting for the startup preload if needed."""
if not session:
return None, "No session available."
if getattr(session, "sandbox", None):
return session.sandbox, None
task = getattr(session, "sandbox_preload_task", None)
if task:
try:
await asyncio.shield(task)
except asyncio.CancelledError:
raise
except Exception as e:
session.sandbox_preload_error = f"Failed to create sandbox: {e}"
if getattr(session, "sandbox", None):
return session.sandbox, None
preload_error = getattr(session, "sandbox_preload_error", None)
if preload_error:
return None, preload_error
return None, "Sandbox is still starting. Please retry shortly."
async def teardown_session_sandbox(session: Any) -> None:
"""Cancel sandbox preload and delete the active owned sandbox, if present."""
if not session:
return
await cancel_sandbox_preload(session)
_cancel_sandbox_yolo_renewal(session)
sandbox = getattr(session, "sandbox", None)
session.sandbox = None
if not sandbox:
session.sandbox_hardware = None
return
try:
if not getattr(sandbox, "_owns_space", False):
return
space_id = getattr(sandbox, "space_id", None)
delete_log = _session_tool_logger(session)
last_err: Exception | None = None
for attempt in range(3):
try:
logger.info(
"Deleting sandbox %s (attempt %s/3)...",
space_id,
attempt + 1,
)
await asyncio.to_thread(sandbox.delete, log=delete_log)
from agent.core import telemetry
usage = await telemetry.record_sandbox_destroy(session, sandbox)
from agent.core.yolo_budget import (
adjust_session_spend,
reconcile_budget_reservation,
)
actual_total = (
usage.get("estimated_cost_usd") if isinstance(usage, dict) else None
)
finalized = _sandbox_yolo_finalized_cost_usd(session)
active_reservation_id = getattr(
session, "_sandbox_yolo_reservation_id", None
)
actual_unfinalized = None
if actual_total is not None:
actual_unfinalized = max(0.0, float(actual_total) - finalized)
reconcile_budget_reservation(
session,
active_reservation_id,
actual_unfinalized,
allow_zero_actual=True,
)
if active_reservation_id is None and actual_unfinalized:
adjust_session_spend(session, actual_unfinalized)
session._sandbox_yolo_reservation_id = None
session._sandbox_yolo_finalized_cost_usd = 0.0
return
except Exception as e:
last_err = e
if attempt < 2:
await asyncio.sleep(2**attempt)
logger.error(
"Failed to delete sandbox %s after 3 attempts: %s. "
"Orphan β sweep script will pick it up.",
space_id,
last_err,
)
finally:
session.sandbox_hardware = None
await _clear_persisted_sandbox(session)
# ββ sandbox_create tool ββββββββββββββββββββββββββββββββββββββββββββββ
SANDBOX_CREATE_TOOL_SPEC = {
"name": "sandbox_create",
"description": (
"Create or replace the session sandbox when non-default hardware is needed.\n\n"
"A private cpu-basic sandbox is already started automatically for each session. "
"For normal CPU code execution, call bash/read/write/edit directly; do NOT call sandbox_create first.\n\n"
"Use sandbox_create when: you need GPU hardware, cpu-upgrade, or Trackio secrets before running code. "
"The active sandbox persists across tool calls within the session. pip install works out of the box. "
"Sandboxes are always created as private HF Spaces.\n\n"
"For ML code that uses CUDA, bf16, or model loading: use GPU hardware (t4-small minimum). "
"CPU sandboxes cannot run GPU code paths β your test will not catch GPU-related errors.\n\n"
"Before choosing hardware, estimate your VRAM needs (models you run, training data size). Rule of thumb: bf16/fp16 β 2 bytes/param, "
"fp32 β 4 bytes/param, plus ~20% overhead for optimizer states during training.\n"
"Common picks: t4-small (16GB VRAM, fits β€1-3B), a10g-small (24GB, β€7B), a100-large (80GB, β€30B). "
"If the model won't fit, pick larger hardware upfront β OOM on a sandbox wastes time.\n\n"
"If you intend to run a training script in this sandbox that uses report_to='trackio', "
"pass `trackio_space_id` (e.g. '<username>/ml-intern-<8char>') and `trackio_project` so they "
"are set as TRACKIO_SPACE_ID/TRACKIO_PROJECT secrets in the sandbox and the UI can embed the live dashboard.\n\n"
"Hardware: " + ", ".join([e.value for e in SpaceHardware]) + ".\n"
),
"parameters": {
"type": "object",
"required": [],
"additionalProperties": False,
"properties": {
"hardware": {
"type": "string",
"enum": [e.value for e in SpaceHardware],
"description": (
"Hardware tier for the sandbox. Omit for the existing auto-started "
"cpu-basic sandbox; choose GPU/cpu-upgrade only when needed."
),
},
"trackio_space_id": {
"type": "string",
"description": (
"Optional. The HF Space hosting the trackio dashboard for runs in this sandbox "
"(e.g. '<username>/ml-intern-<8char>', under YOUR HF namespace). Injected as "
"TRACKIO_SPACE_ID secret and surfaced to the UI. The Space is auto-created and "
"seeded with the trackio dashboard β DO NOT pre-create it via hf_repo_git, "
"that produces an empty Space that breaks the embed."
),
},
"trackio_project": {
"type": "string",
"description": (
"Optional. The trackio project name. Injected as TRACKIO_PROJECT secret and "
"used by the UI to filter the embedded dashboard to this project."
),
},
},
},
}
async def sandbox_create_handler(
args: dict[str, Any], session: Any = None, tool_call_id: str | None = None
) -> tuple[str, bool]:
"""Handle sandbox_create tool calls."""
hardware = args.get("hardware", DEFAULT_CPU_SANDBOX_HARDWARE)
trackio_space_id = args.get("trackio_space_id") or None
trackio_project = args.get("trackio_project") or None
async def _emit_trackio_state(sb: Sandbox) -> None:
"""Tell the frontend which trackio dashboard to embed for this sandbox."""
if not (session and tool_call_id and trackio_space_id):
return
data: dict[str, Any] = {
"tool_call_id": tool_call_id,
"tool": "sandbox_create",
"state": "running",
"trackioSpaceId": trackio_space_id,
}
if trackio_project:
data["trackioProject"] = trackio_project
await session.send_event(Event(event_type="tool_state_change", data=data))
preload_task = getattr(session, "sandbox_preload_task", None)
if (
session
and not getattr(session, "sandbox", None)
and preload_task
and not preload_task.done()
and hardware == DEFAULT_CPU_SANDBOX_HARDWARE
):
sb, error = await get_active_or_preloaded_sandbox(session)
if error:
return error, False
if sb:
await _emit_trackio_state(sb)
return (
f"Sandbox already active: {sb.space_id}\n"
f"URL: {sb.url}\n"
f"Hardware: {DEFAULT_CPU_SANDBOX_HARDWARE}\n"
f"Use bash/read/write/edit to interact with it."
), True
if (
session
and not getattr(session, "sandbox", None)
and preload_task
and not preload_task.done()
and hardware != DEFAULT_CPU_SANDBOX_HARDWARE
):
await cancel_sandbox_preload(session)
# If sandbox already exists, return its info or replace the auto CPU sandbox
if session and getattr(session, "sandbox", None):
sb = session.sandbox
active_hardware = getattr(session, "sandbox_hardware", None)
if active_hardware == hardware:
await _emit_trackio_state(sb)
return (
f"Sandbox already active: {sb.space_id}\n"
f"URL: {sb.url}\n"
f"Hardware: {active_hardware}\n"
f"Use bash/read/write/edit to interact with it."
), True
requested_hardware = args.get("hardware")
lockout_note = ""
if (
active_hardware == DEFAULT_CPU_SANDBOX_HARDWARE
and hardware != DEFAULT_CPU_SANDBOX_HARDWARE
):
await teardown_session_sandbox(session)
elif requested_hardware:
lockout_note = (
f"\nRequested hardware: {requested_hardware}\n"
"Hardware cannot be changed by calling sandbox_create again. "
"Delete the existing sandbox first if you need a different tier."
)
await _emit_trackio_state(sb)
return (
f"Sandbox already active: {sb.space_id}\n"
f"URL: {sb.url}\n"
f"{lockout_note}\n"
f"Use bash/read/write/edit to interact with it."
), True
else:
await _emit_trackio_state(sb)
return (
f"Sandbox already active: {sb.space_id}\n"
f"URL: {sb.url}\n"
f"Hardware: {active_hardware or 'unknown'}\n"
f"Use bash/read/write/edit to interact with it."
), True
create_kwargs: dict[str, Any] = {}
extra_secrets: dict[str, str] = {}
if trackio_space_id:
extra_secrets["TRACKIO_SPACE_ID"] = trackio_space_id
await _seed_trackio_dashboard_safe(session, trackio_space_id)
if trackio_project:
extra_secrets["TRACKIO_PROJECT"] = trackio_project
try:
sb, error = await _ensure_sandbox(
session,
hardware=hardware,
extra_secrets=extra_secrets or None,
**create_kwargs,
)
except Exception as e:
return f"Failed to create sandbox: {e}", False
if error:
return error, False
if session and tool_call_id and hardware != DEFAULT_CPU_SANDBOX_HARDWARE:
session._sandbox_yolo_reservation_id = tool_call_id
session._sandbox_yolo_finalized_cost_usd = 0.0
_start_sandbox_yolo_renewal(
session,
hardware=hardware,
reservation_id=tool_call_id,
)
await _emit_trackio_state(sb)
return (
f"Sandbox created: {sb.space_id}\n"
f"URL: {sb.url}\n"
f"Hardware: {hardware}\n"
"Visibility: private\n"
f"Use bash/read/write/edit to interact with it."
), True
def _make_tool_handler(sandbox_tool_name: str):
"""Factory: create a handler for a sandbox operation tool."""
async def handler(args: dict[str, Any], session: Any = None) -> tuple[str, bool]:
sb, error = await get_active_or_preloaded_sandbox(session)
if error:
return error, False
if not sb:
return "Sandbox is still starting. Please retry shortly.", False
try:
if sandbox_tool_name == "bash" and args.get("command"):
args = {
**args,
"command": wrap_shell_command_with_hub_artifact_bootstrap(
args["command"],
session,
),
}
result = await asyncio.to_thread(sb.call_tool, sandbox_tool_name, args)
if result.success:
output = result.output or "(no output)"
return output, True
else:
error_msg = result.error or "Unknown error"
output = result.output
if output:
return f"{output}\n\nERROR: {error_msg}", False
return f"ERROR: {error_msg}", False
except Exception as e:
return f"Sandbox operation failed: {e}", False
return handler
def get_sandbox_tools():
"""Return all 5 sandbox ToolSpecs (sandbox_create + 4 operation tools)."""
from agent.core.tools import ToolSpec
tools = []
# sandbox_create (for GPU or other non-default hardware)
tools.append(
ToolSpec(
name=SANDBOX_CREATE_TOOL_SPEC["name"],
description=SANDBOX_CREATE_TOOL_SPEC["description"],
parameters=SANDBOX_CREATE_TOOL_SPEC["parameters"],
handler=sandbox_create_handler,
)
)
# Operation tools (auto-execute, no approval needed)
for name in Sandbox.TOOLS.keys():
spec = Sandbox.TOOLS[name]
description = (
"Uses the session's active sandbox. A private cpu-basic sandbox is "
"started automatically for normal CPU work; call sandbox_create only "
"for GPU or other non-default hardware.\n\n" + spec["description"]
)
tools.append(
ToolSpec(
name=name,
description=description,
parameters=spec["parameters"],
handler=_make_tool_handler(name),
)
)
return tools
|