lewtun HF Staff Codex commited on
Commit
15d6506
·
unverified ·
1 Parent(s): 5db99fa

Make sandboxes private (#195)

Browse files

* Make sandbox Spaces private

Co-authored-by: Codex <codex@openai.com>

* Remove legacy sandbox auth fallback

Co-authored-by: Codex <codex@openai.com>

* Address sandbox privacy review comments

Co-authored-by: Codex <codex@openai.com>

---------

Co-authored-by: Codex <codex@openai.com>

agent/tools/sandbox_client.py CHANGED
@@ -13,7 +13,7 @@ Architecture:
13
  - Optionally deletes the Space when done
14
 
15
  Lifecycle:
16
- sb = Sandbox.create(owner="burtenshaw") # duplicate, wait, connect
17
  sb = Sandbox.create(owner="burtenshaw", # with options
18
  hardware="t4-small",
19
  private=True,
@@ -157,18 +157,20 @@ def _atomic_write(path: pathlib.Path, content: str):
157
 
158
  app = FastAPI()
159
 
160
- def _expected_api_token() -> str:
161
- return os.environ.get("SANDBOX_API_TOKEN") or os.environ.get("HF_TOKEN") or ""
 
 
 
162
 
163
  def _require_auth(request: Request) -> None:
164
- expected = _expected_api_token()
165
- if not expected:
166
  raise HTTPException(status_code=503, detail="Sandbox API token not configured")
167
- auth_header = request.headers.get("authorization", "")
168
- scheme, _, supplied = auth_header.partition(" ")
169
- if scheme.lower() != "bearer" or not supplied:
170
  raise HTTPException(status_code=401, detail="Missing bearer token")
171
- if not hmac.compare_digest(supplied, expected):
172
  raise HTTPException(status_code=401, detail="Invalid bearer token")
173
 
174
  _AUTH = [Depends(_require_auth)]
@@ -513,15 +515,28 @@ class Sandbox:
513
  # Trailing slash is critical: httpx resolves relative paths against base_url.
514
  # Without it, client.get("health") resolves to /health instead of /api/health.
515
  self._base_url = f"https://{slug}.hf.space/api/"
516
- api_token = self.api_token or self.token
517
  self._client = httpx.Client(
518
  base_url=self._base_url,
519
- headers={"Authorization": f"Bearer {api_token}"} if api_token else {},
520
  timeout=httpx.Timeout(MAX_TIMEOUT, connect=30),
521
  follow_redirects=True,
522
  )
523
  self._hf_api = HfApi(token=self.token)
524
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
525
  # ── Lifecycle ─────────────────────────────────────────────────
526
 
527
  class Cancelled(Exception):
@@ -535,7 +550,7 @@ class Sandbox:
535
  name: str | None = None,
536
  template: str = TEMPLATE_SPACE,
537
  hardware: str = "cpu-basic",
538
- private: bool = False,
539
  sleep_time: int | None = None,
540
  token: str | None = None,
541
  secrets: dict[str, str] | None = None,
@@ -555,7 +570,7 @@ class Sandbox:
555
  A unique suffix is always appended.
556
  template: Source Space to duplicate (default: burtenshaw/sandbox).
557
  hardware: Hardware tier (cpu-basic, t4-small, etc.).
558
- private: Whether the Space should be private.
559
  sleep_time: Auto-sleep after N seconds of inactivity.
560
  token: HF API token (from user's OAuth session).
561
  wait_timeout: Max seconds to wait for Space to start (default: 300).
 
13
  - Optionally deletes the Space when done
14
 
15
  Lifecycle:
16
+ sb = Sandbox.create(owner="burtenshaw") # duplicate private Space, wait, connect
17
  sb = Sandbox.create(owner="burtenshaw", # with options
18
  hardware="t4-small",
19
  private=True,
 
157
 
158
  app = FastAPI()
159
 
160
+ def _bearer_token(header: str) -> str:
161
+ scheme, _, supplied = header.partition(" ")
162
+ if scheme.lower() != "bearer" or not supplied:
163
+ return ""
164
+ return supplied
165
 
166
  def _require_auth(request: Request) -> None:
167
+ sandbox_token = os.environ.get("SANDBOX_API_TOKEN") or ""
168
+ if not sandbox_token:
169
  raise HTTPException(status_code=503, detail="Sandbox API token not configured")
170
+ supplied = _bearer_token(request.headers.get("x-sandbox-authorization", ""))
171
+ if not supplied:
 
172
  raise HTTPException(status_code=401, detail="Missing bearer token")
173
+ if not hmac.compare_digest(supplied, sandbox_token):
174
  raise HTTPException(status_code=401, detail="Invalid bearer token")
175
 
176
  _AUTH = [Depends(_require_auth)]
 
515
  # Trailing slash is critical: httpx resolves relative paths against base_url.
516
  # Without it, client.get("health") resolves to /health instead of /api/health.
517
  self._base_url = f"https://{slug}.hf.space/api/"
 
518
  self._client = httpx.Client(
519
  base_url=self._base_url,
520
+ headers=self._auth_headers(),
521
  timeout=httpx.Timeout(MAX_TIMEOUT, connect=30),
522
  follow_redirects=True,
523
  )
524
  self._hf_api = HfApi(token=self.token)
525
 
526
+ def _auth_headers(self) -> dict[str, str]:
527
+ """Return headers for private HF Space access plus sandbox API auth.
528
+
529
+ Private Spaces require the HF token in ``Authorization`` at the Hub
530
+ edge. The sandbox server requires its control-plane token in the
531
+ dedicated ``X-Sandbox-Authorization`` header.
532
+ """
533
+ headers: dict[str, str] = {}
534
+ if self.token:
535
+ headers["Authorization"] = f"Bearer {self.token}"
536
+ if self.api_token:
537
+ headers["X-Sandbox-Authorization"] = f"Bearer {self.api_token}"
538
+ return headers
539
+
540
  # ── Lifecycle ─────────────────────────────────────────────────
541
 
542
  class Cancelled(Exception):
 
550
  name: str | None = None,
551
  template: str = TEMPLATE_SPACE,
552
  hardware: str = "cpu-basic",
553
+ private: bool = True,
554
  sleep_time: int | None = None,
555
  token: str | None = None,
556
  secrets: dict[str, str] | None = None,
 
570
  A unique suffix is always appended.
571
  template: Source Space to duplicate (default: burtenshaw/sandbox).
572
  hardware: Hardware tier (cpu-basic, t4-small, etc.).
573
+ private: Whether the Space should be private. Defaults to True.
574
  sleep_time: Auto-sleep after N seconds of inactivity.
575
  token: HF API token (from user's OAuth session).
576
  wait_timeout: Max seconds to wait for Space to start (default: 300).
agent/tools/sandbox_tool.py CHANGED
@@ -235,6 +235,7 @@ async def _ensure_sandbox(
235
  if extra_secrets:
236
  secrets.update({k: v for k, v in extra_secrets.items() if v})
237
 
 
238
  kwargs = {
239
  "owner": owner,
240
  "hardware": hardware,
@@ -292,7 +293,8 @@ SANDBOX_CREATE_TOOL_SPEC = {
292
  "description": (
293
  "Create a persistent remote Linux environment for developing and testing scripts.\n\n"
294
  "Workflow: sandbox_create → write script → pip install → test with small run → fix errors → hf_jobs at scale.\n"
295
- "The sandbox persists across tool calls within the session. pip install works out of the box.\n\n"
 
296
  "Use this when: you need to develop, test, and iterate on scripts before launching via hf_jobs. "
297
  "Especially for training scripts where you need to verify imports, test on a small subset, and fix errors interactively.\n\n"
298
  "Skip this when: the task is a simple one-shot operation (status check, resource search, quick data query), "
@@ -318,10 +320,6 @@ SANDBOX_CREATE_TOOL_SPEC = {
318
  "enum": [e.value for e in SpaceHardware],
319
  "description": "Hardware tier for the sandbox (default: cpu-basic)",
320
  },
321
- "private": {
322
- "type": "boolean",
323
- "description": "If true, create a private Space",
324
- },
325
  "trackio_space_id": {
326
  "type": "string",
327
  "description": (
@@ -386,8 +384,6 @@ async def sandbox_create_handler(
386
  ), True
387
 
388
  create_kwargs: dict[str, Any] = {}
389
- if "private" in args:
390
- create_kwargs["private"] = args["private"]
391
 
392
  extra_secrets: dict[str, str] = {}
393
  if trackio_space_id:
@@ -415,6 +411,7 @@ async def sandbox_create_handler(
415
  f"Sandbox created: {sb.space_id}\n"
416
  f"URL: {sb.url}\n"
417
  f"Hardware: {hardware}\n"
 
418
  f"Use bash/read/write/edit to interact with it."
419
  ), True
420
 
 
235
  if extra_secrets:
236
  secrets.update({k: v for k, v in extra_secrets.items() if v})
237
 
238
+ create_kwargs["private"] = True # enforce: overrides any caller-supplied value
239
  kwargs = {
240
  "owner": owner,
241
  "hardware": hardware,
 
293
  "description": (
294
  "Create a persistent remote Linux environment for developing and testing scripts.\n\n"
295
  "Workflow: sandbox_create → write script → pip install → test with small run → fix errors → hf_jobs at scale.\n"
296
+ "The sandbox persists across tool calls within the session. pip install works out of the box. "
297
+ "Sandboxes are always created as private HF Spaces.\n\n"
298
  "Use this when: you need to develop, test, and iterate on scripts before launching via hf_jobs. "
299
  "Especially for training scripts where you need to verify imports, test on a small subset, and fix errors interactively.\n\n"
300
  "Skip this when: the task is a simple one-shot operation (status check, resource search, quick data query), "
 
320
  "enum": [e.value for e in SpaceHardware],
321
  "description": "Hardware tier for the sandbox (default: cpu-basic)",
322
  },
 
 
 
 
323
  "trackio_space_id": {
324
  "type": "string",
325
  "description": (
 
384
  ), True
385
 
386
  create_kwargs: dict[str, Any] = {}
 
 
387
 
388
  extra_secrets: dict[str, str] = {}
389
  if trackio_space_id:
 
411
  f"Sandbox created: {sb.space_id}\n"
412
  f"URL: {sb.url}\n"
413
  f"Hardware: {hardware}\n"
414
+ "Visibility: private\n"
415
  f"Use bash/read/write/edit to interact with it."
416
  ), True
417
 
frontend/src/components/Chat/ToolCallGroup.tsx CHANGED
@@ -536,9 +536,7 @@ function InlineApproval({
536
  {' '}({cost})
537
  </Box>
538
  )}
539
- {!!args.private && (
540
- <Box component="span" sx={{ color: 'var(--muted-text)' }}>{' (private)'}</Box>
541
- )}
542
  </Typography>
543
  <Typography variant="body2" sx={{ color: 'var(--muted-text)', fontSize: '0.7rem', opacity: 0.7 }}>
544
  Creates a temporary HF Space to develop and test scripts before running jobs. Takes 1-2 min to start.
 
536
  {' '}({cost})
537
  </Box>
538
  )}
539
+ <Box component="span" sx={{ color: 'var(--muted-text)' }}>{' (private)'}</Box>
 
 
540
  </Typography>
541
  <Typography variant="body2" sx={{ color: 'var(--muted-text)', fontSize: '0.7rem', opacity: 0.7 }}>
542
  Creates a temporary HF Space to develop and test scripts before running jobs. Takes 1-2 min to start.
tests/integration/test_live_sandbox_auth.py CHANGED
@@ -1,7 +1,8 @@
1
  """Opt-in live sandbox communication test.
2
 
3
- This test creates a real Hugging Face Space sandbox, verifies that unauthenticated
4
- requests are rejected, then exercises the authenticated agent client end-to-end.
 
5
  It is skipped unless ``ML_INTERN_LIVE_SANDBOX_TESTS=1`` and ``HF_TOKEN`` are set.
6
  """
7
 
@@ -41,7 +42,7 @@ def test_live_sandbox_authenticated_agent_communication():
41
  owner=owner,
42
  name="ml-intern-live-auth",
43
  hardware="cpu-basic",
44
- private=False,
45
  token=token,
46
  secrets={"HF_TOKEN": token},
47
  wait_timeout=900,
@@ -54,7 +55,7 @@ def test_live_sandbox_authenticated_agent_communication():
54
  )
55
  try:
56
  denied = unauthenticated.post("exists", json={"path": "/tmp"})
57
- assert denied.status_code == 401
58
  finally:
59
  unauthenticated.close()
60
 
 
1
  """Opt-in live sandbox communication test.
2
 
3
+ This test creates a real private Hugging Face Space sandbox, verifies that
4
+ unauthenticated requests are rejected, then exercises the authenticated agent
5
+ client end-to-end.
6
  It is skipped unless ``ML_INTERN_LIVE_SANDBOX_TESTS=1`` and ``HF_TOKEN`` are set.
7
  """
8
 
 
42
  owner=owner,
43
  name="ml-intern-live-auth",
44
  hardware="cpu-basic",
45
+ private=True,
46
  token=token,
47
  secrets={"HF_TOKEN": token},
48
  wait_timeout=900,
 
55
  )
56
  try:
57
  denied = unauthenticated.post("exists", json={"path": "/tmp"})
58
+ assert denied.status_code in {401, 403, 404} # HF private-Space edge may 404 to avoid leaking existence
59
  finally:
60
  unauthenticated.close()
61
 
tests/unit/test_sandbox_api_auth.py CHANGED
@@ -37,7 +37,7 @@ def test_file_and_command_routes_require_bearer_token(monkeypatch):
37
  assert response.status_code == 401
38
 
39
 
40
- def test_file_and_command_routes_accept_valid_bearer_token(monkeypatch):
41
  client = TestClient(_sandbox_app(monkeypatch, "sandbox-secret"))
42
 
43
  response = client.post(
@@ -46,11 +46,42 @@ def test_file_and_command_routes_accept_valid_bearer_token(monkeypatch):
46
  headers={"Authorization": "Bearer sandbox-secret"},
47
  )
48
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  assert response.status_code == 200
50
  assert response.json()["success"] is True
51
 
52
 
53
- def test_legacy_hf_token_fallback_is_accepted(monkeypatch):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
  client = TestClient(_sandbox_app(monkeypatch, token=None, hf_token="hf-secret"))
55
 
56
  response = client.post(
@@ -59,8 +90,7 @@ def test_legacy_hf_token_fallback_is_accepted(monkeypatch):
59
  headers={"Authorization": "Bearer hf-secret"},
60
  )
61
 
62
- assert response.status_code == 200
63
- assert response.json()["success"] is True
64
 
65
 
66
  def test_protected_routes_fail_closed_without_configured_token(monkeypatch):
@@ -75,10 +105,11 @@ def test_protected_routes_fail_closed_without_configured_token(monkeypatch):
75
  assert response.status_code == 503
76
 
77
 
78
- def test_sandbox_prefers_control_plane_token_for_api_headers():
79
  sandbox = Sandbox("owner/name", token="hf-token", api_token="sandbox-secret")
80
 
81
- assert sandbox._client.headers["authorization"] == "Bearer sandbox-secret"
 
82
 
83
 
84
  def test_sandbox_api_token_is_hidden_from_repr():
 
37
  assert response.status_code == 401
38
 
39
 
40
+ def test_file_and_command_routes_reject_authorization_bearer_token(monkeypatch):
41
  client = TestClient(_sandbox_app(monkeypatch, "sandbox-secret"))
42
 
43
  response = client.post(
 
46
  headers={"Authorization": "Bearer sandbox-secret"},
47
  )
48
 
49
+ assert response.status_code == 401
50
+
51
+
52
+ def test_file_and_command_routes_accept_sandbox_header_with_hf_bearer(monkeypatch):
53
+ client = TestClient(
54
+ _sandbox_app(monkeypatch, "sandbox-secret", hf_token="hf-secret")
55
+ )
56
+
57
+ response = client.post(
58
+ "/api/exists",
59
+ json={"path": "/tmp"},
60
+ headers={
61
+ "Authorization": "Bearer hf-secret",
62
+ "X-Sandbox-Authorization": "Bearer sandbox-secret",
63
+ },
64
+ )
65
+
66
  assert response.status_code == 200
67
  assert response.json()["success"] is True
68
 
69
 
70
+ def test_hf_bearer_alone_is_rejected_when_sandbox_token_is_configured(monkeypatch):
71
+ client = TestClient(
72
+ _sandbox_app(monkeypatch, "sandbox-secret", hf_token="hf-secret")
73
+ )
74
+
75
+ response = client.post(
76
+ "/api/exists",
77
+ json={"path": "/tmp"},
78
+ headers={"Authorization": "Bearer hf-secret"},
79
+ )
80
+
81
+ assert response.status_code == 401
82
+
83
+
84
+ def test_legacy_hf_token_fallback_is_rejected(monkeypatch):
85
  client = TestClient(_sandbox_app(monkeypatch, token=None, hf_token="hf-secret"))
86
 
87
  response = client.post(
 
90
  headers={"Authorization": "Bearer hf-secret"},
91
  )
92
 
93
+ assert response.status_code == 503
 
94
 
95
 
96
  def test_protected_routes_fail_closed_without_configured_token(monkeypatch):
 
105
  assert response.status_code == 503
106
 
107
 
108
+ def test_sandbox_sends_hub_auth_and_control_plane_header():
109
  sandbox = Sandbox("owner/name", token="hf-token", api_token="sandbox-secret")
110
 
111
+ assert sandbox._client.headers["authorization"] == "Bearer hf-token"
112
+ assert sandbox._client.headers["x-sandbox-authorization"] == "Bearer sandbox-secret"
113
 
114
 
115
  def test_sandbox_api_token_is_hidden_from_repr():
tests/unit/test_sandbox_private_spaces.py ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ from types import SimpleNamespace
3
+
4
+ from agent.core import telemetry
5
+ from agent.tools import sandbox_client, sandbox_tool
6
+ from agent.tools.sandbox_client import Sandbox
7
+ from agent.tools.sandbox_tool import sandbox_create_handler
8
+
9
+
10
+ def test_sandbox_client_defaults_to_private_spaces(monkeypatch):
11
+ duplicate_kwargs = {}
12
+
13
+ class FakeApi:
14
+ def __init__(self, token=None):
15
+ self.token = token
16
+
17
+ def duplicate_space(self, **kwargs):
18
+ duplicate_kwargs.update(kwargs)
19
+
20
+ def add_space_secret(self, *args, **kwargs):
21
+ pass
22
+
23
+ def get_space_runtime(self, space_id):
24
+ return SimpleNamespace(stage="RUNNING", hardware="cpu-basic")
25
+
26
+ monkeypatch.setattr(sandbox_client, "HfApi", FakeApi)
27
+ monkeypatch.setattr(
28
+ Sandbox,
29
+ "_setup_server",
30
+ staticmethod(lambda *args, **kwargs: None),
31
+ )
32
+ monkeypatch.setattr(Sandbox, "_wait_for_api", lambda self, *args, **kwargs: None)
33
+
34
+ Sandbox.create(owner="alice", token="hf-token", log=lambda msg: None)
35
+
36
+ assert duplicate_kwargs["private"] is True
37
+
38
+
39
+ def test_sandbox_tool_forces_private_spaces(monkeypatch):
40
+ captured_kwargs = {}
41
+
42
+ async def fake_ensure_sandbox(
43
+ session,
44
+ hardware="cpu-basic",
45
+ extra_secrets=None,
46
+ **create_kwargs,
47
+ ):
48
+ captured_kwargs.update(create_kwargs)
49
+ return (
50
+ SimpleNamespace(
51
+ space_id="alice/sandbox-12345678",
52
+ url="https://huggingface.co/spaces/alice/sandbox-12345678",
53
+ ),
54
+ None,
55
+ )
56
+
57
+ monkeypatch.setattr(sandbox_tool, "_ensure_sandbox", fake_ensure_sandbox)
58
+
59
+ out, ok = asyncio.run(
60
+ sandbox_create_handler(
61
+ {"private": False},
62
+ session=SimpleNamespace(sandbox=None),
63
+ )
64
+ )
65
+
66
+ assert ok is True
67
+ assert "private" not in captured_kwargs
68
+ assert "Visibility: private" in out
69
+
70
+
71
+ def test_ensure_sandbox_overrides_private_argument(monkeypatch):
72
+ captured_kwargs = {}
73
+
74
+ class FakeApi:
75
+ def __init__(self, token=None):
76
+ self.token = token
77
+
78
+ def whoami(self):
79
+ return {"name": "alice"}
80
+
81
+ class FakeSession:
82
+ def __init__(self):
83
+ self.hf_token = "hf-token"
84
+ self.sandbox = None
85
+ self.event_queue = SimpleNamespace(put_nowait=lambda event: None)
86
+ self._cancelled = asyncio.Event()
87
+
88
+ async def send_event(self, event):
89
+ pass
90
+
91
+ def fake_create(**kwargs):
92
+ captured_kwargs.update(kwargs)
93
+ return SimpleNamespace(
94
+ space_id="alice/sandbox-12345678",
95
+ url="https://huggingface.co/spaces/alice/sandbox-12345678",
96
+ )
97
+
98
+ async def fake_record_sandbox_create(*args, **kwargs):
99
+ pass
100
+
101
+ monkeypatch.setattr(sandbox_tool, "HfApi", FakeApi)
102
+ monkeypatch.setattr(sandbox_tool, "_cleanup_user_orphan_sandboxes", lambda *args: 0)
103
+ monkeypatch.setattr(Sandbox, "create", staticmethod(fake_create))
104
+ monkeypatch.setattr(telemetry, "record_sandbox_create", fake_record_sandbox_create)
105
+ monkeypatch.setattr("huggingface_hub.metadata_update", lambda *args, **kwargs: None)
106
+
107
+ async def run():
108
+ session = FakeSession()
109
+ sb, error = await sandbox_tool._ensure_sandbox(session, private=False)
110
+ return sb, error
111
+
112
+ sb, error = asyncio.run(run())
113
+
114
+ assert error is None
115
+ assert sb is not None
116
+ assert captured_kwargs["private"] is True