File size: 28,009 Bytes
a644598 8615c28 a644598 8615c28 a644598 1d50c78 e7524a8 8615c28 b05b6f5 1d50c78 a644598 4fc6e96 a644598 6131fc8 a644598 1d50c78 8615c28 1d50c78 1e1fe1f 1d50c78 8615c28 ae24efc b05b6f5 ae24efc b05b6f5 ae24efc b05b6f5 ae24efc 3c91fc8 ae24efc 23be8d5 ae24efc 23be8d5 ae24efc 3c91fc8 6131fc8 7636865 a644598 1d50c78 8615c28 1d50c78 1e1fe1f 1d50c78 754345f 1d50c78 8615c28 1d50c78 a644598 6131fc8 8615c28 6131fc8 8615c28 6131fc8 a644598 33f29a8 a644598 08153a8 a644598 8615c28 a644598 b05b6f5 9d1a532 e7524a8 8615c28 e7524a8 6131fc8 15d6506 3c91fc8 6131fc8 3c91fc8 e7524a8 3c91fc8 8615c28 9cd534c 2a2e170 754345f 2a2e170 e7524a8 8615c28 b05b6f5 8615c28 754345f 8615c28 a644598 8615c28 7636865 a644598 2a2e170 754345f 2a2e170 754345f 2a2e170 a644598 8615c28 7636865 8615c28 7636865 8615c28 7636865 b05b6f5 7636865 b05b6f5 7636865 754345f 7636865 754345f 7636865 8615c28 a644598 8615c28 15d6506 a644598 9d1a532 6131fc8 11112c6 6131fc8 a644598 8615c28 a644598 6131fc8 11112c6 6131fc8 a644598 6131fc8 a644598 8615c28 6131fc8 3eec386 8615c28 a644598 8615c28 3eec386 8615c28 3eec386 8615c28 a644598 6131fc8 a644598 6131fc8 a644598 6131fc8 a644598 6131fc8 a644598 15d6506 a644598 8615c28 a644598 4fc6e96 a644598 6d4f619 a644598 8615c28 a644598 8615c28 754345f 8615c28 a644598 8615c28 a644598 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 | """
Sandbox tools β expose the Sandbox client as agent tools.
5 tools total:
sandbox_create β create/replace sandbox for non-default hardware
bash, read, write, edit β operations on the active sandbox
A cpu-basic sandbox is preloaded for each session. Operation tools wait for it
if startup is still in progress.
"""
from __future__ import annotations
import asyncio
import logging
import re
import threading
import weakref
from collections.abc import Callable
from datetime import datetime, timedelta, timezone
from typing import Any
from huggingface_hub import HfApi, SpaceHardware
from agent.core.hub_artifacts import wrap_shell_command_with_hub_artifact_bootstrap
from agent.core.session import Event
from agent.tools.sandbox_client import Sandbox
from agent.tools.trackio_seed import ensure_trackio_dashboard
logger = logging.getLogger(__name__)
DEFAULT_CPU_SANDBOX_HARDWARE = "cpu-basic"
# Match the exact suffix pattern Sandbox.create produces: "sandbox-<8 hex>".
# Used to identify orphan sandboxes from prior sessions safely (won't match
# user-renamed lookalikes).
SANDBOX_SPACE_NAME_RE = re.compile(r"^sandbox-[a-f0-9]{8}$")
# How stale a sandbox must be before we treat it as definitely orphan.
# Anything more recent could be tied to a still-live session in another tab,
# so we leave it alone.
_ORPHAN_STALE_AFTER = timedelta(hours=1)
# HF Space duplication/build APIs can behave poorly when multiple private
# sandboxes are created concurrently for the same namespace. Keep session
# creation non-blocking, but serialize the actual Hub create path per owner.
_SANDBOX_CREATE_LOCKS: weakref.WeakKeyDictionary[
asyncio.AbstractEventLoop, dict[str, asyncio.Lock]
] = weakref.WeakKeyDictionary()
def _get_sandbox_create_lock(owner: str) -> asyncio.Lock:
loop = asyncio.get_running_loop()
locks = _SANDBOX_CREATE_LOCKS.setdefault(loop, {})
lock = locks.get(owner)
if lock is None:
lock = asyncio.Lock()
locks[owner] = lock
return lock
def _session_tool_logger(
session: Any, *, tool: str = "sandbox"
) -> Callable[[str], object] | None:
event_queue = getattr(session, "event_queue", None)
if event_queue is None:
return None
loop = asyncio.get_running_loop()
def _log(msg: str) -> None:
loop.call_soon_threadsafe(
event_queue.put_nowait,
Event(event_type="tool_log", data={"tool": tool, "log": msg}),
)
return _log
def _looks_like_path(script: str) -> bool:
"""Return True if the script string looks like a file path (not inline code)."""
if not (
isinstance(script, str)
and script.strip() == script
and not any(c in script for c in "\r\n\0")
):
return False
if script.startswith("http://") or script.startswith("https://"):
return False
return (
script.startswith("/")
or script.startswith("./")
or script.startswith("../")
or (script.endswith(".py") and not any(c.isspace() for c in script))
)
async def resolve_sandbox_script(
sandbox: Any, script: str
) -> tuple[str | None, str | None]:
"""Read a file from the sandbox if *script* looks like a path.
Returns:
(content, error) β content is the file text on success,
error is a message on failure. Both None means *script*
is not a path (caller should use it as-is).
"""
if not sandbox or not _looks_like_path(script):
return None, None
try:
# Use the read endpoint instead of bash("cat ...") which truncates at 25KB.
result = await asyncio.to_thread(sandbox.read, script, limit=100_000)
if result.success and result.output:
# Strip line number prefixes (read returns "N\tcontent" format)
lines = []
for line in result.output.split("\n"):
parts = line.split("\t", 1)
lines.append(parts[1] if len(parts) == 2 else line)
return "\n".join(lines), None
return None, f"Failed to read {script} from sandbox: {result.error}"
except Exception as e:
return None, f"Failed to read {script} from sandbox: {e}"
async def _seed_trackio_dashboard_safe(session: Any, space_id: str) -> None:
"""Idempotently seed *space_id* with trackio dashboard files using the
session's HF token. Logs progress, swallows errors β a failed seed should
not block sandbox creation."""
if not session or not getattr(session, "hf_token", None):
return
loop = asyncio.get_running_loop()
def _log(msg: str) -> None:
loop.call_soon_threadsafe(
session.event_queue.put_nowait,
Event(event_type="tool_log", data={"tool": "sandbox_create", "log": msg}),
)
try:
await asyncio.to_thread(
ensure_trackio_dashboard, space_id, session.hf_token, _log
)
except Exception as e:
_log(f"trackio dashboard seed failed: {e}")
async def _update_persisted_sandbox_fields(session: Any, **fields: Any) -> None:
"""Best-effort update of sandbox metadata on the durable session record."""
store = getattr(session, "persistence_store", None)
session_id = getattr(session, "session_id", None)
if not (store and session_id and hasattr(store, "update_session_fields")):
return
try:
await store.update_session_fields(session_id, **fields)
except Exception as e:
logger.warning("Failed to persist sandbox metadata for %s: %s", session_id, e)
async def _persist_active_sandbox(
session: Any,
sandbox: Sandbox,
*,
hardware: str,
) -> None:
space_id = getattr(sandbox, "space_id", None)
if not space_id:
return
owner = space_id.split("/", 1)[0] if "/" in space_id else None
await _update_persisted_sandbox_fields(
session,
sandbox_space_id=space_id,
sandbox_hardware=hardware,
sandbox_owner=owner,
sandbox_created_at=datetime.now(timezone.utc),
sandbox_status="active",
)
async def _clear_persisted_sandbox(session: Any) -> None:
await _update_persisted_sandbox_fields(
session,
sandbox_space_id=None,
sandbox_hardware=None,
sandbox_owner=None,
sandbox_created_at=None,
sandbox_status="destroyed",
)
# ββ Tool name mapping (short agent names β Sandbox client names) ββββββ
def _cleanup_user_orphan_sandboxes(
api: HfApi,
owner: str,
log: Any,
) -> int:
"""Delete stale ``sandbox-<8hex>`` Spaces in ``owner``'s account.
"Stale" = not modified in the last hour. The naming pattern + staleness
filter together make this safe:
* Naming: only matches ``sandbox-<exactly 8 lowercase hex>``, the
pattern Sandbox.create produces. Won't touch user-renamed Spaces.
* Staleness: anything modified in the last hour might still be tied
to a live session in another tab/replica, so we leave it alone.
Runs blocking β call via ``asyncio.to_thread``. Best-effort: failures
are logged but never raised, so a flaky HF API never blocks creation.
"""
cutoff = datetime.now(timezone.utc) - _ORPHAN_STALE_AFTER
deleted = 0
try:
spaces = list(api.list_spaces(author=owner, limit=200, full=True))
except Exception as e:
log(f"orphan sweep: list_spaces failed: {e}")
return 0
for space in spaces:
space_name = space.id.rsplit("/", 1)[-1]
if not SANDBOX_SPACE_NAME_RE.match(space_name):
continue
last_mod = getattr(space, "lastModified", None) or getattr(
space, "last_modified", None
)
if isinstance(last_mod, str):
try:
last_mod = datetime.fromisoformat(last_mod.replace("Z", "+00:00"))
except ValueError:
last_mod = None
if last_mod is None:
log(f"orphan sweep: skipping {space.id}; missing lastModified")
continue
if last_mod and last_mod > cutoff:
# Recent β could be a concurrent live session. Skip.
continue
try:
api.delete_repo(repo_id=space.id, repo_type="space")
deleted += 1
log(f"orphan sweep: deleted {space.id}")
except Exception as e:
log(f"orphan sweep: failed to delete {space.id}: {e}")
if deleted:
log(f"orphan sweep: cleaned up {deleted} stale sandbox(es) before create")
return deleted
async def _ensure_sandbox(
session: Any,
hardware: str = DEFAULT_CPU_SANDBOX_HARDWARE,
extra_secrets: dict[str, str] | None = None,
cancel_event: threading.Event | None = None,
**create_kwargs,
) -> tuple[Sandbox | None, str | None]:
"""
Ensure a sandbox exists on the session. Auto-creates with given hardware if needed.
Returns:
(sandbox, error_message) β one will be None.
"""
if session and getattr(session, "sandbox", None):
return session.sandbox, None
if not session:
return None, "No session available."
token = session.hf_token
if not token:
return None, "No HF token available. Cannot create sandbox."
api = HfApi(token=token)
user_info = api.whoami()
owner = user_info.get("name", user_info.get("user", ""))
if not owner:
return None, "Could not determine HF username from token."
create_lock = _get_sandbox_create_lock(owner)
if create_lock.locked():
await session.send_event(
Event(
event_type="tool_log",
data={
"tool": "sandbox",
"log": "Waiting for sandbox creation slot...",
},
)
)
async with create_lock:
if getattr(session, "sandbox", None):
return session.sandbox, None
return await _create_sandbox_locked(
session,
api=api,
owner=owner,
hardware=hardware,
extra_secrets=extra_secrets,
cancel_event=cancel_event,
**create_kwargs,
)
async def _create_sandbox_locked(
session: Any,
*,
api: HfApi,
owner: str,
hardware: str,
extra_secrets: dict[str, str] | None = None,
cancel_event: threading.Event | None = None,
**create_kwargs,
) -> tuple[Sandbox | None, str | None]:
"""Create the Space while the per-owner sandbox creation lock is held."""
token = session.hf_token
await session.send_event(
Event(
event_type="tool_log",
data={
"tool": "sandbox",
"log": f"Auto-creating sandbox for {owner} ({hardware})...",
},
)
)
# Thread-safe log callback: posts tool_log events from worker threads.
_log = _session_tool_logger(session) or (lambda msg: None)
# Bridge asyncio cancel event to a threading.Event for the blocking create call.
# We poll session._cancelled from the main loop in a background task and set
# a threading.Event that Sandbox.create checks during its polling loops.
cancel_flag = cancel_event or threading.Event()
async def _watch_cancel():
await session._cancelled.wait()
cancel_flag.set()
watcher_task = asyncio.create_task(_watch_cancel())
secrets: dict[str, str] = {"HF_TOKEN": token}
if extra_secrets:
secrets.update({k: v for k, v in extra_secrets.items() if v})
create_kwargs["private"] = True # enforce: overrides any caller-supplied value
kwargs = {
"owner": owner,
"hardware": hardware,
"token": token,
"secrets": secrets,
"log": _log,
"cancel_event": cancel_flag,
**create_kwargs,
}
if hardware != DEFAULT_CPU_SANDBOX_HARDWARE:
kwargs["sleep_time"] = 2700
import time as _t
_t_start = _t.monotonic()
try:
sb = await asyncio.to_thread(Sandbox.create, **kwargs)
except Sandbox.Cancelled:
return None, "Sandbox creation cancelled by user."
finally:
watcher_task.cancel()
if cancel_flag.is_set():
if getattr(sb, "_owns_space", False):
try:
await asyncio.to_thread(sb.delete, log=_log)
except Exception as e:
logger.warning(
"Failed to delete cancelled sandbox %s: %s", sb.space_id, e
)
return None, "Sandbox creation cancelled by user."
session.sandbox = sb
session.sandbox_hardware = hardware
session.sandbox_preload_error = None
await _persist_active_sandbox(session, sb, hardware=hardware)
# Telemetry: sandbox creation (infra consumption signal)
from agent.core import telemetry
await telemetry.record_sandbox_create(
session,
sb,
hardware=hardware,
create_latency_s=int(_t.monotonic() - _t_start),
)
await session.send_event(
Event(
event_type="tool_log",
data={"tool": "sandbox", "log": f"Sandbox ready: {sb.space_id} ({sb.url})"},
)
)
return sb, None
def start_cpu_sandbox_preload(session: Any) -> asyncio.Task | None:
"""Start a background ``cpu-basic`` sandbox for this session."""
if not session or getattr(session, "sandbox", None):
return None
existing_task = getattr(session, "sandbox_preload_task", None)
if existing_task and not existing_task.done():
return existing_task
cancel_event = threading.Event()
session.sandbox_preload_cancel_event = cancel_event
session.sandbox_preload_error = None
async def _preload() -> Sandbox | None:
try:
sb, error = await _ensure_sandbox(
session,
hardware=DEFAULT_CPU_SANDBOX_HARDWARE,
cancel_event=cancel_event,
)
if error:
session.sandbox_preload_error = error
return None
return sb
except asyncio.CancelledError:
cancel_event.set()
session.sandbox_preload_error = "Sandbox creation cancelled by user."
raise
except Exception as e:
session.sandbox_preload_error = f"Failed to create sandbox: {e}"
logger.warning("CPU sandbox preload failed: %s", e)
return None
task = asyncio.create_task(_preload())
session.sandbox_preload_task = task
return task
async def cancel_sandbox_preload(session: Any) -> None:
"""Best-effort cancellation for an in-flight CPU sandbox preload."""
cancel_event = getattr(session, "sandbox_preload_cancel_event", None)
if cancel_event is not None:
cancel_event.set()
task = getattr(session, "sandbox_preload_task", None)
if not task or task.done():
return
current_task = asyncio.current_task()
if task is current_task:
return
try:
await asyncio.wait_for(asyncio.shield(task), timeout=30)
except asyncio.TimeoutError:
logger.warning(
"Timed out waiting for CPU sandbox preload cancellation; "
"task is still live, cancelling asyncio wrapper"
)
task.cancel()
except asyncio.CancelledError:
raise
except Exception:
pass
async def get_active_or_preloaded_sandbox(
session: Any,
) -> tuple[Sandbox | None, str | None]:
"""Return the active sandbox, waiting for the startup preload if needed."""
if not session:
return None, "No session available."
if getattr(session, "sandbox", None):
return session.sandbox, None
task = getattr(session, "sandbox_preload_task", None)
if task:
try:
await asyncio.shield(task)
except asyncio.CancelledError:
raise
except Exception as e:
session.sandbox_preload_error = f"Failed to create sandbox: {e}"
if getattr(session, "sandbox", None):
return session.sandbox, None
preload_error = getattr(session, "sandbox_preload_error", None)
if preload_error:
return None, preload_error
return None, "Sandbox is still starting. Please retry shortly."
async def teardown_session_sandbox(session: Any) -> None:
"""Cancel sandbox preload and delete the active owned sandbox, if present."""
if not session:
return
await cancel_sandbox_preload(session)
sandbox = getattr(session, "sandbox", None)
session.sandbox = None
session.sandbox_hardware = None
if not sandbox:
return
try:
if not getattr(sandbox, "_owns_space", False):
return
space_id = getattr(sandbox, "space_id", None)
delete_log = _session_tool_logger(session)
last_err: Exception | None = None
for attempt in range(3):
try:
logger.info(
"Deleting sandbox %s (attempt %s/3)...",
space_id,
attempt + 1,
)
await asyncio.to_thread(sandbox.delete, log=delete_log)
from agent.core import telemetry
await telemetry.record_sandbox_destroy(session, sandbox)
return
except Exception as e:
last_err = e
if attempt < 2:
await asyncio.sleep(2**attempt)
logger.error(
"Failed to delete sandbox %s after 3 attempts: %s. "
"Orphan β sweep script will pick it up.",
space_id,
last_err,
)
finally:
await _clear_persisted_sandbox(session)
# ββ sandbox_create tool ββββββββββββββββββββββββββββββββββββββββββββββ
SANDBOX_CREATE_TOOL_SPEC = {
"name": "sandbox_create",
"description": (
"Create or replace the session sandbox when non-default hardware is needed.\n\n"
"A private cpu-basic sandbox is already started automatically for each session. "
"For normal CPU code execution, call bash/read/write/edit directly; do NOT call sandbox_create first.\n\n"
"Use sandbox_create when: you need GPU hardware, cpu-upgrade, or Trackio secrets before running code. "
"The active sandbox persists across tool calls within the session. pip install works out of the box. "
"Sandboxes are always created as private HF Spaces.\n\n"
"For ML code that uses CUDA, bf16, or model loading: use GPU hardware (t4-small minimum). "
"CPU sandboxes cannot run GPU code paths β your test will not catch GPU-related errors.\n\n"
"Before choosing hardware, estimate your VRAM needs (models you run, training data size). Rule of thumb: bf16/fp16 β 2 bytes/param, "
"fp32 β 4 bytes/param, plus ~20% overhead for optimizer states during training.\n"
"Common picks: t4-small (16GB VRAM, fits β€1-3B), a10g-small (24GB, β€7B), a100-large (80GB, β€30B). "
"If the model won't fit, pick larger hardware upfront β OOM on a sandbox wastes time.\n\n"
"If you intend to run a training script in this sandbox that uses report_to='trackio', "
"pass `trackio_space_id` (e.g. '<username>/ml-intern-<8char>') and `trackio_project` so they "
"are set as TRACKIO_SPACE_ID/TRACKIO_PROJECT secrets in the sandbox and the UI can embed the live dashboard.\n\n"
"Hardware: " + ", ".join([e.value for e in SpaceHardware]) + ".\n"
),
"parameters": {
"type": "object",
"required": [],
"additionalProperties": False,
"properties": {
"hardware": {
"type": "string",
"enum": [e.value for e in SpaceHardware],
"description": (
"Hardware tier for the sandbox. Omit for the existing auto-started "
"cpu-basic sandbox; choose GPU/cpu-upgrade only when needed."
),
},
"trackio_space_id": {
"type": "string",
"description": (
"Optional. The HF Space hosting the trackio dashboard for runs in this sandbox "
"(e.g. '<username>/ml-intern-<8char>', under YOUR HF namespace). Injected as "
"TRACKIO_SPACE_ID secret and surfaced to the UI. The Space is auto-created and "
"seeded with the trackio dashboard β DO NOT pre-create it via hf_repo_git, "
"that produces an empty Space that breaks the embed."
),
},
"trackio_project": {
"type": "string",
"description": (
"Optional. The trackio project name. Injected as TRACKIO_PROJECT secret and "
"used by the UI to filter the embedded dashboard to this project."
),
},
},
},
}
async def sandbox_create_handler(
args: dict[str, Any], session: Any = None, tool_call_id: str | None = None
) -> tuple[str, bool]:
"""Handle sandbox_create tool calls."""
hardware = args.get("hardware", DEFAULT_CPU_SANDBOX_HARDWARE)
trackio_space_id = args.get("trackio_space_id") or None
trackio_project = args.get("trackio_project") or None
async def _emit_trackio_state(sb: Sandbox) -> None:
"""Tell the frontend which trackio dashboard to embed for this sandbox."""
if not (session and tool_call_id and trackio_space_id):
return
data: dict[str, Any] = {
"tool_call_id": tool_call_id,
"tool": "sandbox_create",
"state": "running",
"trackioSpaceId": trackio_space_id,
}
if trackio_project:
data["trackioProject"] = trackio_project
await session.send_event(Event(event_type="tool_state_change", data=data))
preload_task = getattr(session, "sandbox_preload_task", None)
if (
session
and not getattr(session, "sandbox", None)
and preload_task
and not preload_task.done()
and hardware == DEFAULT_CPU_SANDBOX_HARDWARE
):
sb, error = await get_active_or_preloaded_sandbox(session)
if error:
return error, False
if sb:
await _emit_trackio_state(sb)
return (
f"Sandbox already active: {sb.space_id}\n"
f"URL: {sb.url}\n"
f"Hardware: {DEFAULT_CPU_SANDBOX_HARDWARE}\n"
f"Use bash/read/write/edit to interact with it."
), True
if (
session
and not getattr(session, "sandbox", None)
and preload_task
and not preload_task.done()
and hardware != DEFAULT_CPU_SANDBOX_HARDWARE
):
await cancel_sandbox_preload(session)
# If sandbox already exists, return its info or replace the auto CPU sandbox
if session and getattr(session, "sandbox", None):
sb = session.sandbox
active_hardware = getattr(session, "sandbox_hardware", None)
if active_hardware == hardware:
await _emit_trackio_state(sb)
return (
f"Sandbox already active: {sb.space_id}\n"
f"URL: {sb.url}\n"
f"Hardware: {active_hardware}\n"
f"Use bash/read/write/edit to interact with it."
), True
requested_hardware = args.get("hardware")
lockout_note = ""
if (
active_hardware == DEFAULT_CPU_SANDBOX_HARDWARE
and hardware != DEFAULT_CPU_SANDBOX_HARDWARE
):
await teardown_session_sandbox(session)
elif requested_hardware:
lockout_note = (
f"\nRequested hardware: {requested_hardware}\n"
"Hardware cannot be changed by calling sandbox_create again. "
"Delete the existing sandbox first if you need a different tier."
)
await _emit_trackio_state(sb)
return (
f"Sandbox already active: {sb.space_id}\n"
f"URL: {sb.url}\n"
f"{lockout_note}\n"
f"Use bash/read/write/edit to interact with it."
), True
else:
await _emit_trackio_state(sb)
return (
f"Sandbox already active: {sb.space_id}\n"
f"URL: {sb.url}\n"
f"Hardware: {active_hardware or 'unknown'}\n"
f"Use bash/read/write/edit to interact with it."
), True
create_kwargs: dict[str, Any] = {}
extra_secrets: dict[str, str] = {}
if trackio_space_id:
extra_secrets["TRACKIO_SPACE_ID"] = trackio_space_id
await _seed_trackio_dashboard_safe(session, trackio_space_id)
if trackio_project:
extra_secrets["TRACKIO_PROJECT"] = trackio_project
try:
sb, error = await _ensure_sandbox(
session,
hardware=hardware,
extra_secrets=extra_secrets or None,
**create_kwargs,
)
except Exception as e:
return f"Failed to create sandbox: {e}", False
if error:
return error, False
await _emit_trackio_state(sb)
return (
f"Sandbox created: {sb.space_id}\n"
f"URL: {sb.url}\n"
f"Hardware: {hardware}\n"
"Visibility: private\n"
f"Use bash/read/write/edit to interact with it."
), True
def _make_tool_handler(sandbox_tool_name: str):
"""Factory: create a handler for a sandbox operation tool."""
async def handler(args: dict[str, Any], session: Any = None) -> tuple[str, bool]:
sb, error = await get_active_or_preloaded_sandbox(session)
if error:
return error, False
if not sb:
return "Sandbox is still starting. Please retry shortly.", False
try:
if sandbox_tool_name == "bash" and args.get("command"):
args = {
**args,
"command": wrap_shell_command_with_hub_artifact_bootstrap(
args["command"],
session,
),
}
result = await asyncio.to_thread(sb.call_tool, sandbox_tool_name, args)
if result.success:
output = result.output or "(no output)"
return output, True
else:
error_msg = result.error or "Unknown error"
output = result.output
if output:
return f"{output}\n\nERROR: {error_msg}", False
return f"ERROR: {error_msg}", False
except Exception as e:
return f"Sandbox operation failed: {e}", False
return handler
def get_sandbox_tools():
"""Return all 5 sandbox ToolSpecs (sandbox_create + 4 operation tools)."""
from agent.core.tools import ToolSpec
tools = []
# sandbox_create (for GPU or other non-default hardware)
tools.append(
ToolSpec(
name=SANDBOX_CREATE_TOOL_SPEC["name"],
description=SANDBOX_CREATE_TOOL_SPEC["description"],
parameters=SANDBOX_CREATE_TOOL_SPEC["parameters"],
handler=sandbox_create_handler,
)
)
# Operation tools (auto-execute, no approval needed)
for name in Sandbox.TOOLS.keys():
spec = Sandbox.TOOLS[name]
description = (
"Uses the session's active sandbox. A private cpu-basic sandbox is "
"started automatically for normal CPU work; call sandbox_create only "
"for GPU or other non-default hardware.\n\n" + spec["description"]
)
tools.append(
ToolSpec(
name=name,
description=description,
parameters=spec["parameters"],
handler=_make_tool_handler(name),
)
)
return tools
|