lewtun HF Staff OpenAI Codex commited on
Commit
1e1fe1f
·
unverified ·
1 Parent(s): b4ae9d9

Skip sandbox spaces for artifact metadata (#241)

Browse files

* Skip sandbox spaces for artifact metadata

Co-authored-by: OpenAI Codex <codex@openai.com>

* Address sandbox metadata PR feedback

Co-authored-by: OpenAI Codex <codex@openai.com>

* Avoid sandbox title metadata updates

Co-authored-by: OpenAI Codex <codex@openai.com>

---------

Co-authored-by: OpenAI Codex <codex@openai.com>

agent/core/hub_artifacts.py CHANGED
@@ -79,6 +79,20 @@ def _artifact_key(repo_id: str, repo_type: str | None) -> str:
79
  return f"{repo_type or 'model'}:{repo_id}"
80
 
81
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
  def _session_artifact_set(session: Any, attr: str) -> set[str]:
83
  current = getattr(session, attr, None)
84
  if isinstance(current, set):
@@ -397,6 +411,8 @@ def register_hub_artifact(
397
  repo_type = repo_type or "model"
398
  if repo_type not in SUPPORTED_REPO_TYPES:
399
  return False
 
 
400
 
401
  key = _artifact_key(repo_id, repo_type)
402
  remember_hub_artifact(session, repo_id, repo_type)
@@ -465,6 +481,7 @@ def build_hub_artifact_sitecustomize(session: Any) -> str:
465
  tag = {ML_INTERN_TAG!r}
466
  marker = {PROVENANCE_MARKER!r}
467
  supported = {sorted(SUPPORTED_REPO_TYPES)!r}
 
468
  registering = False
469
  collection_slug = {collection_slug!r}
470
  registered = set()
@@ -611,6 +628,8 @@ def build_hub_artifact_sitecustomize(session: Any) -> str:
611
  repo_type = repo_type or "model"
612
  if repo_type not in supported:
613
  return
 
 
614
  key = f"{{repo_type}}:{{repo_id}}"
615
  if key in registered and not force:
616
  return
@@ -666,6 +685,12 @@ def build_hub_artifact_sitecustomize(session: Any) -> str:
666
  def _repo_type(kwargs):
667
  return kwargs.get("repo_type") or "model"
668
 
 
 
 
 
 
 
669
  def _patched_create_repo(self, *args, **kwargs):
670
  result = _original_create_repo(self, *args, **kwargs)
671
  repo_id = _repo_id(args, kwargs)
 
79
  return f"{repo_type or 'model'}:{repo_id}"
80
 
81
 
82
+ def _sandbox_space_name_pattern() -> str:
83
+ from agent.tools.sandbox_tool import SANDBOX_SPACE_NAME_RE
84
+
85
+ return SANDBOX_SPACE_NAME_RE.pattern
86
+
87
+
88
+ def is_sandbox_hub_repo(repo_id: str | None, repo_type: str | None) -> bool:
89
+ """Return True for ML Intern's ephemeral sandbox Space repos."""
90
+ if (repo_type or "model") != "space" or not repo_id:
91
+ return False
92
+ repo_name = str(repo_id).rsplit("/", 1)[-1]
93
+ return bool(re.fullmatch(_sandbox_space_name_pattern(), repo_name))
94
+
95
+
96
  def _session_artifact_set(session: Any, attr: str) -> set[str]:
97
  current = getattr(session, attr, None)
98
  if isinstance(current, set):
 
411
  repo_type = repo_type or "model"
412
  if repo_type not in SUPPORTED_REPO_TYPES:
413
  return False
414
+ if is_sandbox_hub_repo(repo_id, repo_type):
415
+ return False
416
 
417
  key = _artifact_key(repo_id, repo_type)
418
  remember_hub_artifact(session, repo_id, repo_type)
 
481
  tag = {ML_INTERN_TAG!r}
482
  marker = {PROVENANCE_MARKER!r}
483
  supported = {sorted(SUPPORTED_REPO_TYPES)!r}
484
+ sandbox_space_re = re.compile({_sandbox_space_name_pattern()!r})
485
  registering = False
486
  collection_slug = {collection_slug!r}
487
  registered = set()
 
628
  repo_type = repo_type or "model"
629
  if repo_type not in supported:
630
  return
631
+ if _is_sandbox_repo(repo_id, repo_type):
632
+ return
633
  key = f"{{repo_type}}:{{repo_id}}"
634
  if key in registered and not force:
635
  return
 
685
  def _repo_type(kwargs):
686
  return kwargs.get("repo_type") or "model"
687
 
688
+ def _is_sandbox_repo(repo_id, repo_type):
689
+ if (repo_type or "model") != "space" or not repo_id:
690
+ return False
691
+ repo_name = str(repo_id).rsplit("/", 1)[-1]
692
+ return bool(sandbox_space_re.fullmatch(repo_name))
693
+
694
  def _patched_create_repo(self, *args, **kwargs):
695
  result = _original_create_repo(self, *args, **kwargs)
696
  repo_id = _repo_id(args, kwargs)
agent/tools/sandbox_tool.py CHANGED
@@ -33,7 +33,7 @@ DEFAULT_CPU_SANDBOX_HARDWARE = "cpu-basic"
33
  # Match the exact suffix pattern Sandbox.create produces: "sandbox-<8 hex>".
34
  # Used to identify orphan sandboxes from prior sessions safely (won't match
35
  # user-renamed lookalikes).
36
- _SANDBOX_NAME_RE = re.compile(r"^sandbox-[a-f0-9]{8}$")
37
 
38
  # How stale a sandbox must be before we treat it as definitely orphan.
39
  # Anything more recent could be tied to a still-live session in another tab,
@@ -195,7 +195,7 @@ def _cleanup_user_orphan_sandboxes(
195
 
196
  for space in spaces:
197
  space_name = space.id.rsplit("/", 1)[-1]
198
- if not _SANDBOX_NAME_RE.match(space_name):
199
  continue
200
 
201
  last_mod = getattr(space, "lastModified", None) or getattr(
@@ -374,18 +374,6 @@ async def _create_sandbox_locked(
374
  create_latency_s=int(_t.monotonic() - _t_start),
375
  )
376
 
377
- # Set a descriptive title (template title is inherited on duplicate)
378
- from huggingface_hub import metadata_update
379
-
380
- await asyncio.to_thread(
381
- metadata_update,
382
- sb.space_id,
383
- {"title": "ml-intern sandbox"},
384
- repo_type="space",
385
- overwrite=True,
386
- token=token,
387
- )
388
-
389
  await session.send_event(
390
  Event(
391
  event_type="tool_log",
 
33
  # Match the exact suffix pattern Sandbox.create produces: "sandbox-<8 hex>".
34
  # Used to identify orphan sandboxes from prior sessions safely (won't match
35
  # user-renamed lookalikes).
36
+ SANDBOX_SPACE_NAME_RE = re.compile(r"^sandbox-[a-f0-9]{8}$")
37
 
38
  # How stale a sandbox must be before we treat it as definitely orphan.
39
  # Anything more recent could be tied to a still-live session in another tab,
 
195
 
196
  for space in spaces:
197
  space_name = space.id.rsplit("/", 1)[-1]
198
+ if not SANDBOX_SPACE_NAME_RE.match(space_name):
199
  continue
200
 
201
  last_mod = getattr(space, "lastModified", None) or getattr(
 
374
  create_latency_s=int(_t.monotonic() - _t_start),
375
  )
376
 
 
 
 
 
 
 
 
 
 
 
 
 
377
  await session.send_event(
378
  Event(
379
  event_type="tool_log",
tests/unit/test_hub_artifacts.py CHANGED
@@ -13,6 +13,7 @@ from agent.core.hub_artifacts import (
13
  build_hub_artifact_sitecustomize,
14
  ensure_session_artifact_collection,
15
  is_known_hub_artifact,
 
16
  register_hub_artifact,
17
  remember_hub_artifact,
18
  start_session_artifact_collection_task,
@@ -162,6 +163,35 @@ def test_register_hub_artifact_creates_private_collection_and_adds_item_once(
162
  assert b"ml-intern" in api.uploads[0]["path_or_fileobj"]
163
 
164
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
165
  def test_register_hub_artifact_retries_after_partial_failure(monkeypatch):
166
  session = _session()
167
  api = SimpleNamespace(token="hf-token")
@@ -503,3 +533,73 @@ def test_sitecustomize_bootstrap_reuses_existing_collection_slug():
503
  assert (
504
  "collection_slug = 'alice/ml-intern-artifacts-2026-05-05-session-123'" in code
505
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  build_hub_artifact_sitecustomize,
14
  ensure_session_artifact_collection,
15
  is_known_hub_artifact,
16
+ is_sandbox_hub_repo,
17
  register_hub_artifact,
18
  remember_hub_artifact,
19
  start_session_artifact_collection_task,
 
163
  assert b"ml-intern" in api.uploads[0]["path_or_fileobj"]
164
 
165
 
166
+ def test_register_hub_artifact_skips_sandbox_spaces(monkeypatch):
167
+ session = _session()
168
+ api = SimpleNamespace(token="hf-token")
169
+ calls = []
170
+
171
+ monkeypatch.setattr(
172
+ hub_artifacts,
173
+ "_update_repo_card",
174
+ lambda *args, **kwargs: calls.append(("card", args, kwargs)),
175
+ )
176
+ monkeypatch.setattr(
177
+ hub_artifacts,
178
+ "_add_to_collection",
179
+ lambda *args, **kwargs: calls.append(("collection", args, kwargs)),
180
+ )
181
+
182
+ assert is_sandbox_hub_repo("alice/sandbox-1234abcd", "space")
183
+ assert not is_sandbox_hub_repo("alice/sandbox-1234abcd", "model")
184
+ assert not is_sandbox_hub_repo("alice/demo-space", "space")
185
+ assert not register_hub_artifact(
186
+ api,
187
+ "alice/sandbox-1234abcd",
188
+ "space",
189
+ session=session,
190
+ )
191
+ assert not is_known_hub_artifact(session, "alice/sandbox-1234abcd", "space")
192
+ assert calls == []
193
+
194
+
195
  def test_register_hub_artifact_retries_after_partial_failure(monkeypatch):
196
  session = _session()
197
  api = SimpleNamespace(token="hf-token")
 
533
  assert (
534
  "collection_slug = 'alice/ml-intern-artifacts-2026-05-05-session-123'" in code
535
  )
536
+
537
+
538
+ def test_sitecustomize_skips_sandbox_space_registration(monkeypatch):
539
+ import huggingface_hub as hub
540
+ from huggingface_hub import HfApi
541
+
542
+ uploads = []
543
+ downloads = []
544
+ collection_creates = []
545
+ collection_items = []
546
+
547
+ for name in ("create_repo", "upload_folder", "create_commit"):
548
+ if hasattr(HfApi, name):
549
+ monkeypatch.setattr(HfApi, name, getattr(HfApi, name))
550
+ if hasattr(hub, name):
551
+ monkeypatch.setattr(hub, name, getattr(hub, name))
552
+
553
+ def fake_upload_file(self, **kwargs):
554
+ uploads.append(kwargs)
555
+ return SimpleNamespace()
556
+
557
+ def fake_hf_hub_download(*args, **kwargs):
558
+ downloads.append((args, kwargs))
559
+ raise RuntimeError("sandbox metadata update should be skipped")
560
+
561
+ def fake_create_collection(self, **kwargs):
562
+ collection_creates.append(kwargs)
563
+ return SimpleNamespace(slug="alice/ml-intern-artifacts")
564
+
565
+ def fake_add_collection_item(self, **kwargs):
566
+ collection_items.append(kwargs)
567
+
568
+ monkeypatch.setattr(HfApi, "upload_file", fake_upload_file)
569
+ monkeypatch.setattr(HfApi, "create_collection", fake_create_collection)
570
+ monkeypatch.setattr(HfApi, "add_collection_item", fake_add_collection_item)
571
+ monkeypatch.setattr(hub, "upload_file", getattr(hub, "upload_file"))
572
+ monkeypatch.setattr(hub, "hf_hub_download", fake_hf_hub_download)
573
+
574
+ exec(build_hub_artifact_sitecustomize(_session()), {})
575
+ assert HfApi.upload_file is not fake_upload_file
576
+
577
+ HfApi(token="hf-token").upload_file(
578
+ path_or_fileobj=b"app",
579
+ path_in_repo="app.py",
580
+ repo_id="alice/normal-space",
581
+ repo_type="space",
582
+ token="hf-token",
583
+ )
584
+
585
+ assert downloads[0][1]["repo_id"] == "alice/normal-space"
586
+ assert len(collection_creates) == 1
587
+ assert collection_items[0]["item_id"] == "alice/normal-space"
588
+
589
+ uploads.clear()
590
+ downloads.clear()
591
+ collection_creates.clear()
592
+ collection_items.clear()
593
+
594
+ HfApi(token="hf-token").upload_file(
595
+ path_or_fileobj=b"app",
596
+ path_in_repo="app.py",
597
+ repo_id="alice/sandbox-1234abcd",
598
+ repo_type="space",
599
+ token="hf-token",
600
+ )
601
+
602
+ assert [upload["repo_id"] for upload in uploads] == ["alice/sandbox-1234abcd"]
603
+ assert downloads == []
604
+ assert collection_creates == []
605
+ assert collection_items == []
tests/unit/test_sandbox_private_spaces.py CHANGED
@@ -11,6 +11,10 @@ from agent.tools.sandbox_client import Sandbox
11
  from agent.tools.sandbox_tool import sandbox_create_handler
12
 
13
 
 
 
 
 
14
  def test_sandbox_client_defaults_to_private_spaces(monkeypatch):
15
  duplicate_kwargs = {}
16
  requested_hardware = []
@@ -295,7 +299,7 @@ def test_ensure_sandbox_overrides_private_argument(monkeypatch):
295
  monkeypatch.setattr(sandbox_tool, "_cleanup_user_orphan_sandboxes", lambda *args: 0)
296
  monkeypatch.setattr(Sandbox, "create", staticmethod(fake_create))
297
  monkeypatch.setattr(telemetry, "record_sandbox_create", fake_record_sandbox_create)
298
- monkeypatch.setattr("huggingface_hub.metadata_update", lambda *args, **kwargs: None)
299
 
300
  async def run():
301
  session = FakeSession()
@@ -356,7 +360,7 @@ def test_sandbox_creation_is_serialized_per_owner(monkeypatch):
356
  monkeypatch.setattr(sandbox_tool, "_cleanup_user_orphan_sandboxes", lambda *args: 0)
357
  monkeypatch.setattr(Sandbox, "create", staticmethod(fake_create))
358
  monkeypatch.setattr(telemetry, "record_sandbox_create", fake_record_sandbox_create)
359
- monkeypatch.setattr("huggingface_hub.metadata_update", lambda *args, **kwargs: None)
360
 
361
  async def run():
362
  await asyncio.gather(
 
11
  from agent.tools.sandbox_tool import sandbox_create_handler
12
 
13
 
14
+ def _fail_metadata_update(*args, **kwargs):
15
+ raise AssertionError("sandbox creation should not update Space metadata")
16
+
17
+
18
  def test_sandbox_client_defaults_to_private_spaces(monkeypatch):
19
  duplicate_kwargs = {}
20
  requested_hardware = []
 
299
  monkeypatch.setattr(sandbox_tool, "_cleanup_user_orphan_sandboxes", lambda *args: 0)
300
  monkeypatch.setattr(Sandbox, "create", staticmethod(fake_create))
301
  monkeypatch.setattr(telemetry, "record_sandbox_create", fake_record_sandbox_create)
302
+ monkeypatch.setattr("huggingface_hub.metadata_update", _fail_metadata_update)
303
 
304
  async def run():
305
  session = FakeSession()
 
360
  monkeypatch.setattr(sandbox_tool, "_cleanup_user_orphan_sandboxes", lambda *args: 0)
361
  monkeypatch.setattr(Sandbox, "create", staticmethod(fake_create))
362
  monkeypatch.setattr(telemetry, "record_sandbox_create", fake_record_sandbox_create)
363
+ monkeypatch.setattr("huggingface_hub.metadata_update", _fail_metadata_update)
364
 
365
  async def run():
366
  await asyncio.gather(