Aksel Joonas Reedi commited on
Commit
98e4465
·
unverified ·
1 Parent(s): d408a51

Protect sandbox control-plane routes (#142)

Browse files

* Protect sandbox control-plane routes

The sandbox server exposed command and file APIs on public Spaces without validating the client. This adds bearer-token validation to every non-health endpoint and provisions a per-sandbox API secret so control-plane access is not coupled to URL knowledge.

Constraint: Existing sandboxes may only have HF_TOKEN configured, so the server accepts SANDBOX_API_TOKEN first and falls back to HF_TOKEN for compatibility.

Rejected: Make every sandbox private by default | that changes product behavior and still leaves the embedded API without a server-side auth boundary.

Confidence: high

Scope-risk: narrow

Directive: Do not add new sandbox API routes without the auth dependency unless the route is intentionally public health/status only.

Tested: UV_CACHE_DIR=/tmp/uv-cache uv run --extra dev pytest tests/unit/test_sandbox_api_auth.py

* Preserve sandbox reconnect with API token

Review caught that Sandbox.connect would send only the HF token while newly-created sandboxes now expect SANDBOX_API_TOKEN. Add an api_token parameter, hide the field from repr, and cover the documented HF_TOKEN fallback for legacy sandboxes.

Constraint: Existing connect callers must keep working when they only pass an HF token to legacy sandboxes.

Confidence: high

Scope-risk: narrow

Tested: UV_CACHE_DIR=/tmp/uv-cache uv run --extra dev pytest tests/unit/test_sandbox_api_auth.py

* Add live sandbox communication test

Add an opt-in integration test that provisions a real public cpu-basic sandbox, verifies unauthenticated API calls are rejected, exercises authenticated bash/write/exists/read communication, verifies reconnect with the sandbox API token, and deletes the Space afterward.

Constraint: Live sandbox creation requires HF_TOKEN and should not run in default CI because it creates a real Space.

Rejected: Use a private Space for this test | private hf.space requests return 404 before the sandbox FastAPI auth layer is reachable, so it does not validate the public-sandbox threat model fixed here.

Confidence: high

Scope-risk: narrow

Tested: ML_INTERN_LIVE_SANDBOX_TESTS=1 ML_INTERN_LIVE_ENV_FILE=/Users/akseljoonas/Documents/ml-intern/.env UV_CACHE_DIR=/tmp/uv-cache uv run --extra dev pytest tests/integration/test_live_sandbox_auth.py -q -s

Tested: UV_CACHE_DIR=/tmp/uv-cache uv run --extra dev pytest tests/unit/test_sandbox_api_auth.py tests/integration/test_live_sandbox_auth.py -q

agent/tools/sandbox_client.py CHANGED
@@ -37,6 +37,7 @@ Tools: bash, read, write, edit, upload
37
  from __future__ import annotations
38
 
39
  import io
 
40
  import sys
41
  import time
42
  import uuid
@@ -99,8 +100,8 @@ CMD ["python", "sandbox_server.py"]
99
 
100
  _SANDBOX_SERVER = '''\
101
  """Minimal FastAPI server for sandbox operations."""
102
- import os, subprocess, pathlib, signal, threading, re, tempfile
103
- from fastapi import FastAPI
104
  from pydantic import BaseModel
105
  from typing import Optional
106
  import uvicorn
@@ -156,6 +157,22 @@ def _atomic_write(path: pathlib.Path, content: str):
156
 
157
  app = FastAPI()
158
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
159
  # Track active bash processes so they can be killed on cancel
160
  _active_procs = {} # pid -> subprocess.Popen
161
  _proc_lock = threading.Lock()
@@ -344,7 +361,7 @@ def _validate_python(content, path=""):
344
  def health():
345
  return {"status": "ok"}
346
 
347
- @app.post("/api/bash")
348
  def bash(req: BashReq):
349
  try:
350
  proc = subprocess.Popen(
@@ -371,7 +388,7 @@ def bash(req: BashReq):
371
  except Exception as e:
372
  return {"success": False, "output": "", "error": str(e)}
373
 
374
- @app.post("/api/kill")
375
  def kill_all():
376
  """Kill all active bash processes. Called when user cancels."""
377
  with _proc_lock:
@@ -389,7 +406,7 @@ def kill_all():
389
  pass
390
  return {"success": True, "output": f"Killed {len(killed)} process(es): {killed}", "error": ""}
391
 
392
- @app.post("/api/read")
393
  def read(req: ReadReq):
394
  try:
395
  p = pathlib.Path(req.path)
@@ -406,7 +423,7 @@ def read(req: ReadReq):
406
  except Exception as e:
407
  return {"success": False, "output": "", "error": str(e)}
408
 
409
- @app.post("/api/write")
410
  def write(req: WriteReq):
411
  try:
412
  p = pathlib.Path(req.path)
@@ -420,7 +437,7 @@ def write(req: WriteReq):
420
  except Exception as e:
421
  return {"success": False, "output": "", "error": str(e)}
422
 
423
- @app.post("/api/edit")
424
  def edit(req: EditReq):
425
  try:
426
  p = pathlib.Path(req.path)
@@ -447,7 +464,7 @@ def edit(req: EditReq):
447
  except Exception as e:
448
  return {"success": False, "output": "", "error": str(e)}
449
 
450
- @app.post("/api/exists")
451
  def exists(req: ExistsReq):
452
  return {"success": True, "output": str(pathlib.Path(req.path).exists()).lower(), "error": ""}
453
 
@@ -482,6 +499,7 @@ class Sandbox:
482
 
483
  space_id: str
484
  token: str | None = None
 
485
  work_dir: str = "/app"
486
  timeout: int = DEFAULT_TIMEOUT
487
  _owns_space: bool = field(default=False, repr=False)
@@ -495,9 +513,10 @@ class Sandbox:
495
  # Trailing slash is critical: httpx resolves relative paths against base_url.
496
  # Without it, client.get("health") resolves to /health instead of /api/health.
497
  self._base_url = f"https://{slug}.hf.space/api/"
 
498
  self._client = httpx.Client(
499
  base_url=self._base_url,
500
- headers={"Authorization": f"Bearer {self.token}"} if self.token else {},
501
  timeout=httpx.Timeout(MAX_TIMEOUT, connect=30),
502
  follow_redirects=True,
503
  )
@@ -563,6 +582,7 @@ class Sandbox:
563
  base = name or "sandbox"
564
  suffix = uuid.uuid4().hex[:8]
565
  space_id = f"{owner}/{base}-{suffix}"
 
566
 
567
  _log(f"Creating sandbox: {space_id} (from {template})...")
568
 
@@ -583,8 +603,9 @@ class Sandbox:
583
  # Inject secrets BEFORE uploading server files (which triggers rebuild).
584
  # Secrets added after a Space is running aren't available until restart,
585
  # so they must be set before the build/start cycle.
586
- if secrets:
587
- for key, val in secrets.items():
 
588
  api.add_space_secret(space_id, key, val)
589
 
590
  # Upload sandbox server and Dockerfile (triggers rebuild)
@@ -617,7 +638,12 @@ class Sandbox:
617
  _check_cancel()
618
 
619
  # Wait for the API server to be responsive (non-fatal)
620
- sb = cls(space_id=space_id, token=token, _owns_space=True)
 
 
 
 
 
621
  try:
622
  sb._wait_for_api(timeout=API_WAIT_TIMEOUT, log=_log)
623
  except TimeoutError as e:
@@ -648,13 +674,24 @@ class Sandbox:
648
  log("Server files uploaded, rebuild triggered.")
649
 
650
  @classmethod
651
- def connect(cls, space_id: str, *, token: str | None = None) -> Sandbox:
 
 
 
 
 
 
652
  """
653
  Connect to an existing running Space.
654
 
655
  Does a health check to verify the Space is reachable.
656
  """
657
- sb = cls(space_id=space_id, token=token, _owns_space=False)
 
 
 
 
 
658
  sb._wait_for_api(timeout=60)
659
  return sb
660
 
 
37
  from __future__ import annotations
38
 
39
  import io
40
+ import secrets as secrets_lib
41
  import sys
42
  import time
43
  import uuid
 
100
 
101
  _SANDBOX_SERVER = '''\
102
  """Minimal FastAPI server for sandbox operations."""
103
+ import hmac, os, subprocess, pathlib, signal, threading, re, tempfile
104
+ from fastapi import Depends, FastAPI, HTTPException, Request
105
  from pydantic import BaseModel
106
  from typing import Optional
107
  import uvicorn
 
157
 
158
  app = FastAPI()
159
 
160
+ def _expected_api_token() -> str:
161
+ return os.environ.get("SANDBOX_API_TOKEN") or os.environ.get("HF_TOKEN") or ""
162
+
163
+ def _require_auth(request: Request) -> None:
164
+ expected = _expected_api_token()
165
+ if not expected:
166
+ raise HTTPException(status_code=503, detail="Sandbox API token not configured")
167
+ auth_header = request.headers.get("authorization", "")
168
+ scheme, _, supplied = auth_header.partition(" ")
169
+ if scheme.lower() != "bearer" or not supplied:
170
+ raise HTTPException(status_code=401, detail="Missing bearer token")
171
+ if not hmac.compare_digest(supplied, expected):
172
+ raise HTTPException(status_code=401, detail="Invalid bearer token")
173
+
174
+ _AUTH = [Depends(_require_auth)]
175
+
176
  # Track active bash processes so they can be killed on cancel
177
  _active_procs = {} # pid -> subprocess.Popen
178
  _proc_lock = threading.Lock()
 
361
  def health():
362
  return {"status": "ok"}
363
 
364
+ @app.post("/api/bash", dependencies=_AUTH)
365
  def bash(req: BashReq):
366
  try:
367
  proc = subprocess.Popen(
 
388
  except Exception as e:
389
  return {"success": False, "output": "", "error": str(e)}
390
 
391
+ @app.post("/api/kill", dependencies=_AUTH)
392
  def kill_all():
393
  """Kill all active bash processes. Called when user cancels."""
394
  with _proc_lock:
 
406
  pass
407
  return {"success": True, "output": f"Killed {len(killed)} process(es): {killed}", "error": ""}
408
 
409
+ @app.post("/api/read", dependencies=_AUTH)
410
  def read(req: ReadReq):
411
  try:
412
  p = pathlib.Path(req.path)
 
423
  except Exception as e:
424
  return {"success": False, "output": "", "error": str(e)}
425
 
426
+ @app.post("/api/write", dependencies=_AUTH)
427
  def write(req: WriteReq):
428
  try:
429
  p = pathlib.Path(req.path)
 
437
  except Exception as e:
438
  return {"success": False, "output": "", "error": str(e)}
439
 
440
+ @app.post("/api/edit", dependencies=_AUTH)
441
  def edit(req: EditReq):
442
  try:
443
  p = pathlib.Path(req.path)
 
464
  except Exception as e:
465
  return {"success": False, "output": "", "error": str(e)}
466
 
467
+ @app.post("/api/exists", dependencies=_AUTH)
468
  def exists(req: ExistsReq):
469
  return {"success": True, "output": str(pathlib.Path(req.path).exists()).lower(), "error": ""}
470
 
 
499
 
500
  space_id: str
501
  token: str | None = None
502
+ api_token: str | None = field(default=None, repr=False)
503
  work_dir: str = "/app"
504
  timeout: int = DEFAULT_TIMEOUT
505
  _owns_space: bool = field(default=False, repr=False)
 
513
  # Trailing slash is critical: httpx resolves relative paths against base_url.
514
  # Without it, client.get("health") resolves to /health instead of /api/health.
515
  self._base_url = f"https://{slug}.hf.space/api/"
516
+ api_token = self.api_token or self.token
517
  self._client = httpx.Client(
518
  base_url=self._base_url,
519
+ headers={"Authorization": f"Bearer {api_token}"} if api_token else {},
520
  timeout=httpx.Timeout(MAX_TIMEOUT, connect=30),
521
  follow_redirects=True,
522
  )
 
582
  base = name or "sandbox"
583
  suffix = uuid.uuid4().hex[:8]
584
  space_id = f"{owner}/{base}-{suffix}"
585
+ sandbox_api_token = secrets_lib.token_urlsafe(32)
586
 
587
  _log(f"Creating sandbox: {space_id} (from {template})...")
588
 
 
603
  # Inject secrets BEFORE uploading server files (which triggers rebuild).
604
  # Secrets added after a Space is running aren't available until restart,
605
  # so they must be set before the build/start cycle.
606
+ sandbox_secrets = {**(secrets or {}), "SANDBOX_API_TOKEN": sandbox_api_token}
607
+ if sandbox_secrets:
608
+ for key, val in sandbox_secrets.items():
609
  api.add_space_secret(space_id, key, val)
610
 
611
  # Upload sandbox server and Dockerfile (triggers rebuild)
 
638
  _check_cancel()
639
 
640
  # Wait for the API server to be responsive (non-fatal)
641
+ sb = cls(
642
+ space_id=space_id,
643
+ token=token,
644
+ api_token=sandbox_api_token,
645
+ _owns_space=True,
646
+ )
647
  try:
648
  sb._wait_for_api(timeout=API_WAIT_TIMEOUT, log=_log)
649
  except TimeoutError as e:
 
674
  log("Server files uploaded, rebuild triggered.")
675
 
676
  @classmethod
677
+ def connect(
678
+ cls,
679
+ space_id: str,
680
+ *,
681
+ token: str | None = None,
682
+ api_token: str | None = None,
683
+ ) -> Sandbox:
684
  """
685
  Connect to an existing running Space.
686
 
687
  Does a health check to verify the Space is reachable.
688
  """
689
+ sb = cls(
690
+ space_id=space_id,
691
+ token=token,
692
+ api_token=api_token,
693
+ _owns_space=False,
694
+ )
695
  sb._wait_for_api(timeout=60)
696
  return sb
697
 
tests/integration/test_live_sandbox_auth.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Opt-in live sandbox communication test.
2
+
3
+ This test creates a real Hugging Face Space sandbox, verifies that unauthenticated
4
+ requests are rejected, then exercises the authenticated agent client end-to-end.
5
+ It is skipped unless ``ML_INTERN_LIVE_SANDBOX_TESTS=1`` and ``HF_TOKEN`` are set.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import os
11
+ from pathlib import Path
12
+
13
+ import httpx
14
+ import pytest
15
+ from dotenv import load_dotenv
16
+ from huggingface_hub import HfApi
17
+
18
+ from agent.tools.sandbox_client import Sandbox
19
+
20
+
21
+ if env_file := os.environ.get("ML_INTERN_LIVE_ENV_FILE"):
22
+ load_dotenv(Path(env_file))
23
+
24
+
25
+ def _skip_without_live_sandbox() -> None:
26
+ if os.environ.get("ML_INTERN_LIVE_SANDBOX_TESTS") != "1":
27
+ pytest.skip("set ML_INTERN_LIVE_SANDBOX_TESTS=1 to create a real sandbox")
28
+ if not os.environ.get("HF_TOKEN"):
29
+ pytest.skip("set HF_TOKEN to create a real sandbox")
30
+
31
+
32
+ def test_live_sandbox_authenticated_agent_communication():
33
+ _skip_without_live_sandbox()
34
+
35
+ token = os.environ["HF_TOKEN"]
36
+ owner = HfApi(token=token).whoami()["name"]
37
+ sandbox = None
38
+
39
+ try:
40
+ sandbox = Sandbox.create(
41
+ owner=owner,
42
+ name="ml-intern-live-auth",
43
+ hardware="cpu-basic",
44
+ private=False,
45
+ token=token,
46
+ secrets={"HF_TOKEN": token},
47
+ wait_timeout=900,
48
+ )
49
+
50
+ unauthenticated = httpx.Client(
51
+ base_url=sandbox._base_url,
52
+ timeout=30,
53
+ follow_redirects=True,
54
+ )
55
+ try:
56
+ denied = unauthenticated.post("exists", json={"path": "/tmp"})
57
+ assert denied.status_code == 401
58
+ finally:
59
+ unauthenticated.close()
60
+
61
+ bash = sandbox.bash("printf sandbox-live-ok", timeout=30)
62
+ assert bash.success, bash.error
63
+ assert "sandbox-live-ok" in bash.output
64
+
65
+ write = sandbox.write("/tmp/ml_intern_live_auth.txt", "alpha\nbeta\n")
66
+ assert write.success, write.error
67
+
68
+ exists = sandbox._call("exists", {"path": "/tmp/ml_intern_live_auth.txt"})
69
+ assert exists.success, exists.error
70
+ assert exists.output == "true"
71
+
72
+ read = sandbox.read("/tmp/ml_intern_live_auth.txt")
73
+ assert read.success, read.error
74
+ assert "alpha" in read.output
75
+ assert "beta" in read.output
76
+
77
+ reattached = Sandbox.connect(
78
+ sandbox.space_id,
79
+ token=token,
80
+ api_token=sandbox.api_token,
81
+ )
82
+ try:
83
+ reread = reattached.read("/tmp/ml_intern_live_auth.txt")
84
+ assert reread.success, reread.error
85
+ assert "alpha" in reread.output
86
+ finally:
87
+ reattached._client.close()
88
+ finally:
89
+ if sandbox is not None:
90
+ sandbox.delete()
tests/unit/test_sandbox_api_auth.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi.testclient import TestClient
2
+
3
+ from agent.tools.sandbox_client import _SANDBOX_SERVER, Sandbox
4
+
5
+
6
+ def _sandbox_app(
7
+ monkeypatch,
8
+ token: str | None = "sandbox-secret",
9
+ *,
10
+ hf_token: str | None = None,
11
+ ):
12
+ monkeypatch.delenv("SANDBOX_API_TOKEN", raising=False)
13
+ monkeypatch.delenv("HF_TOKEN", raising=False)
14
+ if token is not None:
15
+ monkeypatch.setenv("SANDBOX_API_TOKEN", token)
16
+ if hf_token is not None:
17
+ monkeypatch.setenv("HF_TOKEN", hf_token)
18
+ namespace = {}
19
+ exec(_SANDBOX_SERVER, namespace)
20
+ return namespace["app"]
21
+
22
+
23
+ def test_health_is_public(monkeypatch):
24
+ client = TestClient(_sandbox_app(monkeypatch))
25
+
26
+ response = client.get("/api/health")
27
+
28
+ assert response.status_code == 200
29
+ assert response.json() == {"status": "ok"}
30
+
31
+
32
+ def test_file_and_command_routes_require_bearer_token(monkeypatch):
33
+ client = TestClient(_sandbox_app(monkeypatch, "sandbox-secret"))
34
+
35
+ response = client.post("/api/exists", json={"path": "/tmp"})
36
+
37
+ assert response.status_code == 401
38
+
39
+
40
+ def test_file_and_command_routes_accept_valid_bearer_token(monkeypatch):
41
+ client = TestClient(_sandbox_app(monkeypatch, "sandbox-secret"))
42
+
43
+ response = client.post(
44
+ "/api/exists",
45
+ json={"path": "/tmp"},
46
+ headers={"Authorization": "Bearer sandbox-secret"},
47
+ )
48
+
49
+ assert response.status_code == 200
50
+ assert response.json()["success"] is True
51
+
52
+
53
+ def test_legacy_hf_token_fallback_is_accepted(monkeypatch):
54
+ client = TestClient(_sandbox_app(monkeypatch, token=None, hf_token="hf-secret"))
55
+
56
+ response = client.post(
57
+ "/api/exists",
58
+ json={"path": "/tmp"},
59
+ headers={"Authorization": "Bearer hf-secret"},
60
+ )
61
+
62
+ assert response.status_code == 200
63
+ assert response.json()["success"] is True
64
+
65
+
66
+ def test_protected_routes_fail_closed_without_configured_token(monkeypatch):
67
+ client = TestClient(_sandbox_app(monkeypatch, None))
68
+
69
+ response = client.post(
70
+ "/api/exists",
71
+ json={"path": "/tmp"},
72
+ headers={"Authorization": "Bearer anything"},
73
+ )
74
+
75
+ assert response.status_code == 503
76
+
77
+
78
+ def test_sandbox_prefers_control_plane_token_for_api_headers():
79
+ sandbox = Sandbox("owner/name", token="hf-token", api_token="sandbox-secret")
80
+
81
+ assert sandbox._client.headers["authorization"] == "Bearer sandbox-secret"
82
+
83
+
84
+ def test_sandbox_api_token_is_hidden_from_repr():
85
+ sandbox = Sandbox("owner/name", token="hf-token", api_token="sandbox-secret")
86
+
87
+ assert "sandbox-secret" not in repr(sandbox)