ml-intern

Sleeping

App Files Files Community

lewtun HF Staff OpenAI Codex commited on 26 days ago

Commit

55a2373

2 Parent(s): e8252a8 60f7188

Deploy 2026-05-08

Browse files

Co-authored-by: OpenAI Codex <codex@openai.com>

Files changed (12) hide show

agent/core/agent_loop.py +27 -4
agent/core/hub_artifacts.py +40 -72
agent/core/session.py +5 -2
agent/core/session_resume.py +287 -0
agent/main.py +118 -0
agent/tools/sandbox_client.py +15 -71
agent/utils/terminal_display.py +1 -0
backend/session_manager.py +0 -29
tests/unit/test_hub_artifacts.py +74 -60
tests/unit/test_sandbox_private_spaces.py +51 -59
tests/unit/test_session_manager_persistence.py +5 -57
tests/unit/test_session_resume.py +382 -0

agent/core/agent_loop.py CHANGED Viewed

@@ -7,6 +7,7 @@ import json
 import logging
 import time
 from dataclasses import dataclass, field
 from typing import Any
 from litellm import (
@@ -26,10 +27,9 @@ from agent.core.cost_estimation import CostEstimate, estimate_tool_cost
 from agent.messaging.gateway import NotificationGateway
 from agent.core import telemetry
 from agent.core.doom_loop import check_for_doom_loop
-from agent.core.hub_artifacts import start_session_artifact_collection_task
 from agent.core.llm_params import _resolve_llm_params
 from agent.core.prompt_caching import with_prompt_caching
-from agent.core.session import Event, OpType, Session
 from agent.core.tools import ToolRouter
 from agent.tools.jobs_tool import CPU_FLAVORS
 from agent.tools.sandbox_tool import DEFAULT_CPU_SANDBOX_HARDWARE
@@ -1667,6 +1667,20 @@ class Handlers:
             logger.warning("Undo: no user message found to remove")
         await session.send_event(Event(event_type="undo_complete"))
     @staticmethod
     async def exec_approval(session: Session, approvals: list[dict]) -> None:
         """Handle batch job execution approval"""
@@ -1953,6 +1967,16 @@ async def process_submission(session: Session, submission) -> bool:
         await Handlers.undo(session)
         return True
     if op.op_type == OpType.EXEC_APPROVAL:
         approvals = op.data.get("approvals", []) if op.data else []
         await Handlers.exec_approval(session, approvals)
@@ -1999,7 +2023,6 @@ async def submission_loop(
     )
     if session_holder is not None:
         session_holder[0] = session
-    start_session_artifact_collection_task(session, token=hf_token)
     logger.info("Agent loop started")
     # Retry any failed uploads from previous sessions (fire-and-forget).
@@ -2007,7 +2030,7 @@ async def submission_loop(
     # to publish to the user's HF dataset gets a fresh attempt on next run.
     if config and config.save_sessions:
         Session.retry_failed_uploads_detached(
-            directory="session_logs",
             repo_id=config.session_dataset_repo,
             personal_repo_id=session._personal_trace_repo_id(),
         )

 import logging
 import time
 from dataclasses import dataclass, field
+from pathlib import Path
 from typing import Any
 from litellm import (
 from agent.messaging.gateway import NotificationGateway
 from agent.core import telemetry
 from agent.core.doom_loop import check_for_doom_loop
 from agent.core.llm_params import _resolve_llm_params
 from agent.core.prompt_caching import with_prompt_caching
+from agent.core.session import DEFAULT_SESSION_LOG_DIR, Event, OpType, Session
 from agent.core.tools import ToolRouter
 from agent.tools.jobs_tool import CPU_FLAVORS
 from agent.tools.sandbox_tool import DEFAULT_CPU_SANDBOX_HARDWARE
             logger.warning("Undo: no user message found to remove")
         await session.send_event(Event(event_type="undo_complete"))
+    @staticmethod
+    async def resume(session: Session, path: str) -> None:
+        """Reload context from a saved session log into the active session."""
+        from agent.core.session_resume import restore_session_from_log
+        try:
+            result = restore_session_from_log(session, Path(path))
+        except Exception as e:
+            await session.send_event(
+                Event(event_type="error", data={"error": f"Resume failed: {e}"})
+            )
+            return
+        await session.send_event(Event(event_type="resume_complete", data=result))
     @staticmethod
     async def exec_approval(session: Session, approvals: list[dict]) -> None:
         """Handle batch job execution approval"""
         await Handlers.undo(session)
         return True
+    if op.op_type == OpType.RESUME:
+        path = op.data.get("path") if op.data else None
+        if path:
+            await Handlers.resume(session, path)
+        else:
+            await session.send_event(
+                Event(event_type="error", data={"error": "Resume requires a path"})
+            )
+        return True
     if op.op_type == OpType.EXEC_APPROVAL:
         approvals = op.data.get("approvals", []) if op.data else []
         await Handlers.exec_approval(session, approvals)
     )
     if session_holder is not None:
         session_holder[0] = session
     logger.info("Agent loop started")
     # Retry any failed uploads from previous sessions (fire-and-forget).
     # to publish to the user's HF dataset gets a fresh attempt on next run.
     if config and config.save_sessions:
         Session.retry_failed_uploads_detached(
+            directory=str(DEFAULT_SESSION_LOG_DIR),
             repo_id=config.session_dataset_repo,
             personal_repo_id=session._personal_trace_repo_id(),
         )

agent/core/hub_artifacts.py CHANGED Viewed

@@ -1,6 +1,5 @@
 """Best-effort Hub metadata for artifacts generated by ML Intern sessions."""
-import asyncio
 import base64
 import logging
 import re
@@ -11,7 +10,7 @@ from datetime import datetime
 from pathlib import Path
 from typing import Any
-from huggingface_hub import HfApi, hf_hub_download
 from huggingface_hub.repocard import metadata_load, metadata_save
 from huggingface_hub.utils import EntryNotFoundError, RepositoryNotFoundError
@@ -29,7 +28,6 @@ _UUID_SESSION_ID_RE = re.compile(
 _KNOWN_ARTIFACTS_ATTR = "_ml_intern_known_hub_artifacts"
 _REGISTERED_ARTIFACTS_ATTR = "_ml_intern_registered_hub_artifacts"
 _COLLECTION_SLUG_ATTR = "_ml_intern_artifact_collection_slug"
-_COLLECTION_TASK_ATTR = "_ml_intern_artifact_collection_task"
 _SESSION_ARTIFACT_SET_FALLBACK: dict[tuple[int, str], set[str]] = {}
 _USAGE_HEADING_RE = re.compile(
     r"^#{2,6}\s+(usage|how to use|using this (model|dataset)|use this (model|dataset))\b",
@@ -307,70 +305,6 @@ def _ensure_collection_slug(
     return slug
-async def ensure_session_artifact_collection(
-    session: Any,
-    *,
-    token: str | bool | None = None,
-) -> str | None:
-    """Create/cache the per-session artifact collection without raising."""
-    if session is None or not getattr(session, "session_id", None):
-        return None
-    token_value = token if token is not None else getattr(session, "hf_token", None)
-    if not token_value:
-        return None
-    try:
-        api = HfApi(token=token_value)
-        return await asyncio.to_thread(
-            _ensure_collection_slug,
-            api,
-            session,
-            token=token_value,
-        )
-    except Exception as e:
-        logger.warning(
-            "ML Intern session collection creation failed for %s: %s",
-            _safe_session_id(session),
-            e,
-        )
-        return None
-def start_session_artifact_collection_task(
-    session: Any,
-    *,
-    token: str | bool | None = None,
-) -> asyncio.Task | None:
-    """Schedule best-effort collection creation for a newly started session."""
-    if session is None or not getattr(session, "session_id", None):
-        return None
-    if getattr(session, _COLLECTION_SLUG_ATTR, None):
-        return None
-    token_value = token if token is not None else getattr(session, "hf_token", None)
-    if not token_value:
-        return None
-    existing = getattr(session, _COLLECTION_TASK_ATTR, None)
-    if isinstance(existing, asyncio.Task) and not existing.done():
-        return existing
-    try:
-        loop = asyncio.get_running_loop()
-    except RuntimeError:
-        return None
-    async def _run() -> None:
-        await ensure_session_artifact_collection(session, token=token_value)
-    task = loop.create_task(_run())
-    try:
-        setattr(session, _COLLECTION_TASK_ATTR, task)
-    except Exception:
-        logger.debug("Could not attach ML Intern collection task to session")
-    return task
 def _add_to_collection(
     api: Any,
     session: Any,
@@ -378,10 +312,10 @@ def _add_to_collection(
     repo_type: str,
     *,
     token: str | bool | None = None,
-) -> None:
     slug = _ensure_collection_slug(api, session, token=token)
     if not slug:
-        return
     api.add_collection_item(
         collection_slug=slug,
         item_id=repo_id,
@@ -393,6 +327,7 @@ def _add_to_collection(
         exists_ok=True,
         token=token,
     )
 def register_hub_artifact(
@@ -436,8 +371,13 @@ def register_hub_artifact(
         logger.debug("ML Intern repo-card update failed for %s: %s", repo_id, e)
     try:
-        _add_to_collection(api, session, repo_id, repo_type, token=token_value)
-        collection_updated = True
     except Exception as e:
         logger.debug("ML Intern collection update failed for %s: %s", repo_id, e)
@@ -490,6 +430,13 @@ def build_hub_artifact_sitecustomize(session: Any) -> str:
                 re.IGNORECASE | re.MULTILINE,
             )
             front_matter_re = re.compile(r"\\A---\\s*\\n.*?\\n---\\s*\\n?", re.DOTALL)
             def _token(value=None, api=None):
                 if isinstance(value, str) and value:
@@ -602,6 +549,15 @@ def build_hub_artifact_sitecustomize(session: Any) -> str:
                 nonlocal collection_slug
                 if collection_slug:
                     return collection_slug
                 collection = api.create_collection(
                     title=collection_title,
                     description=(
@@ -613,6 +569,13 @@ def build_hub_artifact_sitecustomize(session: Any) -> str:
                     token=token_value,
                 )
                 collection_slug = getattr(collection, "slug", None)
                 return collection_slug
             def _register(
@@ -637,6 +600,7 @@ def build_hub_artifact_sitecustomize(session: Any) -> str:
                 try:
                     token_value = _token(token_value)
                     api = HfApi(token=token_value)
                     try:
                         current = _readme(api, repo_id, repo_type, token_value)
                         updated = _augment(
@@ -652,8 +616,10 @@ def build_hub_artifact_sitecustomize(session: Any) -> str:
                                 token=token_value,
                                 commit_message="Update ML Intern artifact metadata",
                             )
                     except Exception:
                         pass
                     try:
                         slug = _ensure_collection(api, token_value)
                         if slug:
@@ -668,9 +634,11 @@ def build_hub_artifact_sitecustomize(session: Any) -> str:
                                 exists_ok=True,
                                 token=token_value,
                             )
                     except Exception:
                         pass
-                    registered.add(key)
                 finally:
                     registering = False

 """Best-effort Hub metadata for artifacts generated by ML Intern sessions."""
 import base64
 import logging
 import re
 from pathlib import Path
 from typing import Any
+from huggingface_hub import hf_hub_download
 from huggingface_hub.repocard import metadata_load, metadata_save
 from huggingface_hub.utils import EntryNotFoundError, RepositoryNotFoundError
 _KNOWN_ARTIFACTS_ATTR = "_ml_intern_known_hub_artifacts"
 _REGISTERED_ARTIFACTS_ATTR = "_ml_intern_registered_hub_artifacts"
 _COLLECTION_SLUG_ATTR = "_ml_intern_artifact_collection_slug"
 _SESSION_ARTIFACT_SET_FALLBACK: dict[tuple[int, str], set[str]] = {}
 _USAGE_HEADING_RE = re.compile(
     r"^#{2,6}\s+(usage|how to use|using this (model|dataset)|use this (model|dataset))\b",
     return slug
 def _add_to_collection(
     api: Any,
     session: Any,
     repo_type: str,
     *,
     token: str | bool | None = None,
+) -> bool:
     slug = _ensure_collection_slug(api, session, token=token)
     if not slug:
+        return False
     api.add_collection_item(
         collection_slug=slug,
         item_id=repo_id,
         exists_ok=True,
         token=token,
     )
+    return True
 def register_hub_artifact(
         logger.debug("ML Intern repo-card update failed for %s: %s", repo_id, e)
     try:
+        collection_updated = _add_to_collection(
+            api,
+            session,
+            repo_id,
+            repo_type,
+            token=token_value,
+        )
     except Exception as e:
         logger.debug("ML Intern collection update failed for %s: %s", repo_id, e)
                 re.IGNORECASE | re.MULTILINE,
             )
             front_matter_re = re.compile(r"\\A---\\s*\\n.*?\\n---\\s*\\n?", re.DOTALL)
+            collection_cache_path = (
+                os.environ.get("ML_INTERN_ARTIFACT_COLLECTION_CACHE")
+                or str(
+                    Path(tempfile.gettempdir())
+                    / f"ml-intern-artifacts-{{session_id}}.collection"
+                )
+            )
             def _token(value=None, api=None):
                 if isinstance(value, str) and value:
                 nonlocal collection_slug
                 if collection_slug:
                     return collection_slug
+                try:
+                    cached_slug = Path(collection_cache_path).read_text(
+                        encoding="utf-8"
+                    ).strip()
+                    if cached_slug:
+                        collection_slug = cached_slug
+                        return collection_slug
+                except Exception:
+                    pass
                 collection = api.create_collection(
                     title=collection_title,
                     description=(
                     token=token_value,
                 )
                 collection_slug = getattr(collection, "slug", None)
+                if collection_slug:
+                    try:
+                        cache_path = Path(collection_cache_path)
+                        cache_path.parent.mkdir(parents=True, exist_ok=True)
+                        cache_path.write_text(collection_slug, encoding="utf-8")
+                    except Exception:
+                        pass
                 return collection_slug
             def _register(
                 try:
                     token_value = _token(token_value)
                     api = HfApi(token=token_value)
+                    card_updated = False
                     try:
                         current = _readme(api, repo_id, repo_type, token_value)
                         updated = _augment(
                                 token=token_value,
                                 commit_message="Update ML Intern artifact metadata",
                             )
+                        card_updated = True
                     except Exception:
                         pass
+                    collection_updated = False
                     try:
                         slug = _ensure_collection(api, token_value)
                         if slug:
                                 exists_ok=True,
                                 token=token_value,
                             )
+                            collection_updated = True
                     except Exception:
                         pass
+                    if card_updated and collection_updated:
+                        registered.add(key)
                 finally:
                     registering = False

agent/core/session.py CHANGED Viewed

@@ -21,6 +21,8 @@ logger = logging.getLogger(__name__)
 _DEFAULT_MAX_TOKENS = 200_000
 _TURN_COMPLETE_NOTIFICATION_CHARS = 39000
 def _get_max_tokens_safe(model_name: str) -> int:
     """Return the max input-context tokens for a model.
@@ -60,6 +62,7 @@ class OpType(Enum):
     INTERRUPT = "interrupt"
     UNDO = "undo"
     COMPACT = "compact"
     SHUTDOWN = "shutdown"
@@ -418,7 +421,7 @@ class Session:
     def save_trajectory_local(
         self,
-        directory: str = "session_logs",
         upload_status: str = "pending",
         dataset_url: Optional[str] = None,
     ) -> Optional[str]:
@@ -613,7 +616,7 @@ class Session:
     @staticmethod
     def retry_failed_uploads_detached(
-        directory: str = "session_logs",
         repo_id: Optional[str] = None,
         *,
         personal_repo_id: Optional[str] = None,

 _DEFAULT_MAX_TOKENS = 200_000
 _TURN_COMPLETE_NOTIFICATION_CHARS = 39000
+DEFAULT_SESSION_LOG_DIR = Path("session_logs")
 def _get_max_tokens_safe(model_name: str) -> int:
     """Return the max input-context tokens for a model.
     INTERRUPT = "interrupt"
     UNDO = "undo"
     COMPACT = "compact"
+    RESUME = "resume"
     SHUTDOWN = "shutdown"
     def save_trajectory_local(
         self,
+        directory: str = str(DEFAULT_SESSION_LOG_DIR),
         upload_status: str = "pending",
         dataset_url: Optional[str] = None,
     ) -> Optional[str]:
     @staticmethod
     def retry_failed_uploads_detached(
+        directory: str = str(DEFAULT_SESSION_LOG_DIR),
         repo_id: Optional[str] = None,
         *,
         personal_repo_id: Optional[str] = None,

agent/core/session_resume.py ADDED Viewed

	@@ -0,0 +1,287 @@

+"""Reload a previously saved session log into the active CLI session."""
+from __future__ import annotations
+import json
+import logging
+import re
+from dataclasses import dataclass
+from datetime import datetime
+from pathlib import Path
+from typing import Any
+from litellm import Message
+from agent.core.model_switcher import is_valid_model_id
+from agent.core.session import DEFAULT_SESSION_LOG_DIR
+logger = logging.getLogger(__name__)
+_REDACTED_MARKER = re.compile(r"\[REDACTED_[A-Z_]+\]")
+@dataclass
+class SessionLogEntry:
+    """Metadata for a locally saved session log."""
+    path: Path
+    session_id: str
+    session_start_time: str | None
+    session_end_time: str | None
+    model_name: str | None
+    message_count: int
+    preview: str
+    mtime: float
+def _message_preview(content: Any, max_chars: int = 72) -> str:
+    """Return a one-line preview for string or OpenAI-style block content."""
+    if isinstance(content, str):
+        text = content
+    elif isinstance(content, list):
+        parts: list[str] = []
+        for block in content:
+            if isinstance(block, dict):
+                value = block.get("text") or block.get("content")
+                if isinstance(value, str):
+                    parts.append(value)
+            elif isinstance(block, str):
+                parts.append(block)
+        text = " ".join(parts)
+    else:
+        text = ""
+    text = " ".join(text.split())
+    if len(text) > max_chars:
+        return text[: max_chars - 1].rstrip() + "…"
+    return text
+def _first_user_preview(messages: list[Any]) -> str:
+    for raw in messages:
+        if isinstance(raw, dict) and raw.get("role") == "user":
+            preview = _message_preview(raw.get("content"))
+            if preview:
+                return preview
+    return "(no user prompt preview)"
+def list_session_logs(
+    directory: Path = DEFAULT_SESSION_LOG_DIR,
+) -> list[SessionLogEntry]:
+    """Return readable session logs under ``directory``, newest first."""
+    if not directory.exists():
+        return []
+    entries: list[SessionLogEntry] = []
+    for path in directory.glob("*.json"):
+        try:
+            with open(path) as f:
+                data = json.load(f)
+        except Exception:
+            continue
+        messages = data.get("messages") or []
+        if not isinstance(messages, list):
+            continue
+        session_id = data.get("session_id")
+        if not isinstance(session_id, str) or not session_id:
+            session_id = path.stem
+        stat = path.stat()
+        entries.append(
+            SessionLogEntry(
+                path=path,
+                session_id=session_id,
+                session_start_time=data.get("session_start_time"),
+                session_end_time=data.get("session_end_time"),
+                model_name=data.get("model_name"),
+                message_count=len(messages),
+                preview=_first_user_preview(messages),
+                mtime=stat.st_mtime,
+            )
+        )
+    entries.sort(key=lambda item: item.mtime, reverse=True)
+    return entries
+def format_session_log_entry(index: int, entry: SessionLogEntry) -> str:
+    timestamp = entry.session_end_time or entry.session_start_time
+    label = "unknown time"
+    if isinstance(timestamp, str) and timestamp:
+        try:
+            label = datetime.fromisoformat(timestamp).strftime("%Y-%m-%d %H:%M")
+        except ValueError:
+            label = timestamp[:16]
+    short_id = entry.session_id[:8]
+    model = entry.model_name or "unknown model"
+    return (
+        f"{index:>2}. {label}  {short_id}  "
+        f"{entry.message_count} msgs  {model}\n"
+        f"    {entry.preview}"
+    )
+def resolve_session_log_arg(
+    arg: str,
+    entries: list[SessionLogEntry],
+    directory: Path = DEFAULT_SESSION_LOG_DIR,
+) -> Path | None:
+    """Resolve ``/resume <arg>`` as index, path, filename, or session id prefix."""
+    value = arg.strip()
+    if not value:
+        return None
+    if value.isdigit():
+        idx = int(value)
+        if 1 <= idx <= len(entries):
+            return entries[idx - 1].path
+    candidate = Path(value).expanduser()
+    candidates = [candidate]
+    if not candidate.is_absolute():
+        candidates.append(directory / candidate)
+        if candidate.suffix != ".json":
+            candidates.append(directory / f"{value}.json")
+    for path in candidates:
+        if path.exists() and path.is_file():
+            return path
+    matches = [
+        entry.path
+        for entry in entries
+        if entry.session_id.startswith(value) or entry.path.name.startswith(value)
+    ]
+    if len(matches) == 1:
+        return matches[0]
+    return None
+def _turn_count_from_messages(messages: list[Any]) -> int:
+    return sum(
+        1 for raw in messages if isinstance(raw, dict) and raw.get("role") == "user"
+    )
+def _has_redacted_content(messages: list[Any]) -> bool:
+    """Whether any message body contains a ``[REDACTED_*]`` marker."""
+    for raw in messages:
+        if not isinstance(raw, dict):
+            continue
+        content = raw.get("content")
+        if isinstance(content, str) and _REDACTED_MARKER.search(content):
+            return True
+        if isinstance(content, list):
+            for block in content:
+                if isinstance(block, dict):
+                    text = block.get("text") or block.get("content")
+                    if isinstance(text, str) and _REDACTED_MARKER.search(text):
+                        return True
+    return False
+def restore_session_from_log(session: Any, path: Path) -> dict[str, Any]:
+    """Replace the active session context with messages from ``path``.
+    Continues the saved session (reusing its id and on-disk save path) when
+    the log's ``user_id`` matches the current session, and forks otherwise:
+    the caller's session id stays put and future heartbeat saves go to a
+    fresh file rather than overwriting the source log.
+    Returns metadata for the ``resume_complete`` event.
+    """
+    with open(path) as f:
+        data = json.load(f)
+    raw_messages = data.get("messages")
+    if not isinstance(raw_messages, list):
+        raise ValueError("Selected log does not contain a messages array")
+    restored_messages: list[Message] = []
+    dropped_count = 0
+    for raw in raw_messages:
+        if not isinstance(raw, dict) or raw.get("role") == "system":
+            continue
+        try:
+            restored_messages.append(Message.model_validate(raw))
+        except Exception as e:
+            dropped_count += 1
+            logger.warning("Dropping malformed message from %s: %s", path, e)
+    if not restored_messages:
+        raise ValueError("Selected log has no restorable non-system messages")
+    cm = session.context_manager
+    system_msg = cm.items[0] if cm.items and cm.items[0].role == "system" else None
+    cm.items = ([system_msg] if system_msg else []) + restored_messages
+    # Validate the saved model id before switching. ``update_model`` doesn't
+    # check availability; an unrecognised id silently sticks and the next LLM
+    # call fails with a cryptic routing error. Logs from a different
+    # deployment, an older catalog, or a removed model land here.
+    saved_model = data.get("model_name")
+    invalid_saved_model: str | None = None
+    if isinstance(saved_model, str) and saved_model:
+        if is_valid_model_id(saved_model):
+            session.update_model(saved_model)
+        else:
+            invalid_saved_model = saved_model
+            logger.warning(
+                "Saved log model %r failed format validation; keeping %r",
+                saved_model,
+                session.config.model_name,
+            )
+    cm._recompute_usage(session.config.model_name)
+    saved_session_id = data.get("session_id")
+    saved_user_id = data.get("user_id")
+    is_continuation = saved_user_id == session.user_id
+    if is_continuation:
+        if isinstance(saved_session_id, str) and saved_session_id:
+            session.session_id = saved_session_id
+        session.session_start_time = (
+            data.get("session_start_time") or session.session_start_time
+        )
+    # Always fork the on-disk save path. The source log is treated as an
+    # immutable snapshot: ``logged_events`` is reset to a single
+    # ``resumed_from`` marker below for cost accounting, so reusing the
+    # source path would let the next heartbeat save destroy the original
+    # ``llm_call``/event history on disk. The next save will pick a fresh
+    # filename instead.
+    session._local_save_path = None
+    saved_event_count = (
+        len(data.get("events", [])) if isinstance(data.get("events"), list) else 0
+    )
+    session.logged_events = [
+        {
+            "timestamp": datetime.now().isoformat(),
+            "event_type": "resumed_from",
+            "data": {
+                "path": str(path),
+                "original_session_id": (
+                    saved_session_id if isinstance(saved_session_id, str) else None
+                ),
+                "original_event_count": saved_event_count,
+                "forked": not is_continuation,
+            },
+        }
+    ]
+    session.turn_count = _turn_count_from_messages(raw_messages)
+    session.last_auto_save_turn = session.turn_count
+    session.pending_approval = None
+    return {
+        "path": str(path),
+        "restored_count": len(restored_messages),
+        "dropped_count": dropped_count,
+        "model_name": session.config.model_name,
+        "invalid_saved_model": invalid_saved_model,
+        "forked": not is_continuation,
+        "had_redacted_content": _has_redacted_content(raw_messages),
+    }

agent/main.py CHANGED Viewed

@@ -9,6 +9,7 @@ Supports two modes:
 import argparse
 import asyncio
 import json
 import os
 import signal
 import sys
@@ -55,6 +56,7 @@ litellm.drop_params = True
 litellm.suppress_debug_info = True
 CLI_CONFIG_PATH = Path(__file__).parent.parent / "configs" / "cli_agent_config.json"
 def _is_scheduled_hf_job_tool(tool_info: dict[str, Any]) -> bool:
@@ -368,6 +370,46 @@ async def event_listener(
             elif event.event_type == "undo_complete":
                 console.print("[dim]Undone.[/dim]")
                 turn_complete_event.set()
             elif event.event_type == "tool_log":
                 tool = event.data.get("tool", "") if event.data else ""
                 log = event.data.get("log", "") if event.data else ""
@@ -739,12 +781,69 @@ async def get_user_input(prompt_session: PromptSession) -> str:
 # Slash commands are defined in terminal_display
 async def _handle_slash_command(
     cmd: str,
     config,
     session_holder: list,
     submission_queue: asyncio.Queue,
     submission_id: list[int],
 ) -> Submission | None:
     """
     Handle a slash command. Returns a Submission to enqueue, or None if
@@ -775,6 +874,24 @@ async def _handle_slash_command(
             operation=Operation(op_type=OpType.COMPACT),
         )
     if command == "/model":
         console = get_console()
         if not arg:
@@ -1136,6 +1253,7 @@ async def main(model: str | None = None):
                     session_holder,
                     submission_queue,
                     submission_id,
                 )
                 if sub is None:
                     # Command handled locally, loop back for input

 import argparse
 import asyncio
 import json
+import logging
 import os
 import signal
 import sys
 litellm.suppress_debug_info = True
 CLI_CONFIG_PATH = Path(__file__).parent.parent / "configs" / "cli_agent_config.json"
+logger = logging.getLogger(__name__)
 def _is_scheduled_hf_job_tool(tool_info: dict[str, Any]) -> bool:
             elif event.event_type == "undo_complete":
                 console.print("[dim]Undone.[/dim]")
                 turn_complete_event.set()
+            elif event.event_type == "resume_complete":
+                data = event.data or {}
+                path = data.get("path", "?")
+                count = data.get("restored_count", 0)
+                dropped = int(data.get("dropped_count", 0) or 0)
+                model = data.get("model_name", "?")
+                invalid_model = data.get("invalid_saved_model")
+                forked = bool(data.get("forked", False))
+                redacted = bool(data.get("had_redacted_content", False))
+                verb = "Forked from" if forked else "Resumed"
+                console.print(
+                    f"[green]{verb}[/green] {path} "
+                    f"([cyan]{count}[/cyan] messages, "
+                    f"model [cyan]{model}[/cyan])."
+                )
+                if dropped:
+                    console.print(
+                        f"[yellow]Warning:[/yellow] dropped {dropped} "
+                        "malformed message(s) while restoring — surrounding "
+                        "tool-call alignment may be off."
+                    )
+                if invalid_model:
+                    console.print(
+                        f"[yellow]Warning:[/yellow] saved model id "
+                        f"[cyan]{invalid_model}[/cyan] failed validation; "
+                        f"kept current model [cyan]{model}[/cyan]."
+                    )
+                if forked:
+                    console.print(
+                        "[dim]Saved log belongs to a different user — kept "
+                        "current session id; future saves go to a fresh file.[/dim]"
+                    )
+                if redacted:
+                    console.print(
+                        "[yellow]Note:[/yellow] tokens/secrets in restored "
+                        "messages were scrubbed at save time. Your live tokens "
+                        "are used for this session; [REDACTED_*] markers in "
+                        "past messages are not re-injected."
+                    )
+                turn_complete_event.set()
             elif event.event_type == "tool_log":
                 tool = event.data.get("tool", "") if event.data else ""
                 log = event.data.get("log", "") if event.data else ""
 # Slash commands are defined in terminal_display
+async def _resume_picker(
+    arg: str,
+    prompt_session: PromptSession | None,
+) -> Path | None:
+    """Resolve a session log path via ``arg`` or interactive selection.
+    Returns ``None`` if the user cancels, no logs exist, or the argument
+    matches nothing — already prints the explanation in those cases.
+    """
+    from agent.core.session_resume import (
+        format_session_log_entry,
+        list_session_logs,
+        resolve_session_log_arg,
+    )
+    from agent.core.session import DEFAULT_SESSION_LOG_DIR
+    console = get_console()
+    directory = DEFAULT_SESSION_LOG_DIR
+    entries = list_session_logs(directory)
+    if not entries:
+        console.print(f"[yellow]No session logs found in ./{directory}.[/yellow]")
+        return None
+    if arg:
+        selected = resolve_session_log_arg(arg, entries, directory)
+        if selected is None:
+            console.print(f"[bold red]No matching session log:[/bold red] {arg}")
+        return selected
+    console.print()
+    console.print("[bold]Saved sessions[/bold]")
+    for index, entry in enumerate(entries, start=1):
+        console.print(format_session_log_entry(index, entry))
+    console.print()
+    if prompt_session is None:
+        console.print("[yellow]Cannot prompt for a selection here.[/yellow]")
+        return None
+    try:
+        choice = await prompt_session.prompt_async(
+            "Select session number (blank to cancel): "
+        )
+    except (EOFError, KeyboardInterrupt):
+        console.print("[dim]Resume cancelled.[/dim]")
+        return None
+    choice = choice.strip()
+    if not choice:
+        console.print("[dim]Resume cancelled.[/dim]")
+        return None
+    selected = resolve_session_log_arg(choice, entries, directory)
+    if selected is None:
+        console.print(f"[bold red]Invalid selection:[/bold red] {choice}")
+    return selected
 async def _handle_slash_command(
     cmd: str,
     config,
     session_holder: list,
     submission_queue: asyncio.Queue,
     submission_id: list[int],
+    prompt_session: PromptSession | None = None,
 ) -> Submission | None:
     """
     Handle a slash command. Returns a Submission to enqueue, or None if
             operation=Operation(op_type=OpType.COMPACT),
         )
+    if command == "/resume":
+        session = session_holder[0] if session_holder else None
+        if session is None:
+            get_console().print(
+                "[bold red]No active session to restore into.[/bold red]"
+            )
+            return None
+        selected_path = await _resume_picker(arg, prompt_session)
+        if selected_path is None:
+            return None
+        submission_id[0] += 1
+        return Submission(
+            id=f"sub_{submission_id[0]}",
+            operation=Operation(
+                op_type=OpType.RESUME, data={"path": str(selected_path)}
+            ),
+        )
     if command == "/model":
         console = get_console()
         if not arg:
                     session_holder,
                     submission_queue,
                     submission_id,
+                    prompt_session,
                 )
                 if sub is None:
                     # Command handled locally, loop back for input

agent/tools/sandbox_client.py CHANGED Viewed

@@ -65,7 +65,6 @@ MAX_TIMEOUT = 1200
 WAIT_TIMEOUT = 600
 WAIT_INTERVAL = 5
 API_WAIT_TIMEOUT = 180
-HARDWARE_REQUEST_TIMEOUT = 60
 CPU_BASIC_HARDWARE = "cpu-basic"
@@ -78,58 +77,6 @@ def _is_transient_space_visibility_error(error: Exception) -> bool:
     return "Repository Not Found" in message or "404 Client Error" in message
-def _is_transient_space_management_error(error: Exception) -> bool:
-    """Return True when a just-created private Space is not manageable yet."""
-    response = getattr(error, "response", None)
-    if getattr(response, "status_code", None) in {401, 404}:
-        return True
-    message = str(error)
-    return (
-        "Repository Not Found" in message
-        or "401 Client Error" in message
-        or "404 Client Error" in message
-    )
-def _request_space_hardware_with_retry(
-    api: HfApi,
-    space_id: str,
-    *,
-    hardware: str,
-    sleep_time: int | None,
-    log: Callable[[str], object],
-    check_cancel: Callable[[], object],
-) -> None:
-    """Request hardware, retrying while Hub permissions propagate for a new Space."""
-    deadline = time.time() + HARDWARE_REQUEST_TIMEOUT
-    attempt = 0
-    while True:
-        check_cancel()
-        try:
-            api.request_space_hardware(
-                space_id,
-                hardware=hardware,
-                sleep_time=sleep_time,
-            )
-            return
-        except Exception as e:
-            if not _is_transient_space_management_error(e):
-                raise
-            remaining = deadline - time.time()
-            if remaining <= 0:
-                raise
-            attempt += 1
-            status_code = getattr(getattr(e, "response", None), "status_code", None)
-            status = f"HTTP {status_code}" if status_code else type(e).__name__
-            log(
-                f"  Hardware request not accepted yet ({status}); "
-                f"retrying ({attempt})..."
-            )
-            time.sleep(min(WAIT_INTERVAL, remaining))
 _DOCKERFILE = """\
 FROM ghcr.io/astral-sh/uv:python3.12-bookworm-slim
@@ -679,24 +626,21 @@ class Sandbox:
         _check_cancel()
-        # ``duplicate_space`` already receives the target hardware. The extra
-        # /hardware call is useful for paid tiers, but hosted OAuth tokens can
-        # 401 on that endpoint for a fresh private Space even after duplication
-        # succeeds. Avoid the redundant call for default CPU sandboxes when no
-        # auto-sleep timer is requested; with sleep_time set, the hardware
-        # endpoint is still needed to configure auto-sleep.
-        if hardware == CPU_BASIC_HARDWARE and sleep_time is None:
-            _log(f"Using duplicated Space hardware: {hardware}")
-        else:
-            _request_space_hardware_with_retry(
-                api,
-                space_id,
-                hardware=hardware,
-                sleep_time=sleep_time,
-                log=_log,
-                check_cancel=_check_cancel,
-            )
-            _log(f"Requested hardware: {hardware}")
         # Inject secrets BEFORE uploading server files (which triggers rebuild).
         # Secrets added after a Space is running aren't available until restart,

 WAIT_TIMEOUT = 600
 WAIT_INTERVAL = 5
 API_WAIT_TIMEOUT = 180
 CPU_BASIC_HARDWARE = "cpu-basic"
     return "Repository Not Found" in message or "404 Client Error" in message
 _DOCKERFILE = """\
 FROM ghcr.io/astral-sh/uv:python3.12-bookworm-slim
         _check_cancel()
+        # ``duplicate_space`` sends hardware and sleepTimeSeconds in the
+        # initial create request. Avoid a second /hardware call: deployed HF
+        # OAuth tokens can 401 on that endpoint for a just-created private
+        # Space even though duplication itself succeeded. We rely on the
+        # duplicate endpoint to honor sleepTimeSeconds for upgraded hardware;
+        # cpu-basic auto-sleep is fixed by the Hub.
+        _log(f"Using duplicated Space hardware: {hardware}")
+        if sleep_time is not None:
+            if hardware == CPU_BASIC_HARDWARE:
+                _log(
+                    f"Requested duplicated Space sleep time: {sleep_time}s "
+                    "(cpu-basic auto-sleep is fixed by the Hub)"
+                )
+            else:
+                _log(f"Using duplicated Space sleep time: {sleep_time}s")
         # Inject secrets BEFORE uploading server files (which triggers rebuild).
         # Secrets added after a Space is running aren't available until restart,

agent/utils/terminal_display.py CHANGED Viewed

@@ -451,6 +451,7 @@ HELP_TEXT = f"""\
 {_I}  [cyan]/help[/cyan]            Show this help
 {_I}  [cyan]/undo[/cyan]            Undo last turn
 {_I}  [cyan]/compact[/cyan]         Compact context window
 {_I}  [cyan]/model[/cyan] [id]      Show available models or switch
 {_I}  [cyan]/effort[/cyan] [level]  Reasoning effort (minimal|low|medium|high|xhigh|max|off)
 {_I}  [cyan]/yolo[/cyan]            Toggle auto-approve mode

 {_I}  [cyan]/help[/cyan]            Show this help
 {_I}  [cyan]/undo[/cyan]            Undo last turn
 {_I}  [cyan]/compact[/cyan]         Compact context window
+{_I}  [cyan]/resume[/cyan] [index|id|path] Pick up from a log in ./session_logs
 {_I}  [cyan]/model[/cyan] [id]      Show available models or switch
 {_I}  [cyan]/effort[/cyan] [level]  Reasoning effort (minimal|low|medium|high|xhigh|max|off)
 {_I}  [cyan]/yolo[/cyan]            Toggle auto-approve mode

backend/session_manager.py CHANGED Viewed

@@ -12,7 +12,6 @@ from typing import Any, Optional
 from agent.config import load_config
 from agent.core.agent_loop import process_submission
-from agent.core.hub_artifacts import start_session_artifact_collection_task
 from agent.core.session import Event, OpType, Session
 from agent.core.session_persistence import get_session_store
 from agent.core.tools import ToolRouter
@@ -136,7 +135,6 @@ class SessionManager:
         self.sessions: dict[str, AgentSession] = {}
         self._lock = asyncio.Lock()
         self.persistence_store = None
-        self.enable_hub_artifact_collections = True
     async def start(self) -> None:
         """Start shared background resources."""
@@ -413,28 +411,6 @@ class SessionManager:
         session.sandbox_preload_cancel_event = None
         self._start_cpu_sandbox_preload(agent_session)
-    def _start_hub_artifact_collection(self, agent_session: AgentSession) -> None:
-        """Kick off best-effort Hub collection creation for the session."""
-        if not getattr(self, "enable_hub_artifact_collections", False):
-            return
-        session = agent_session.session
-        if not getattr(session, "session_id", None):
-            try:
-                session.session_id = agent_session.session_id
-            except Exception:
-                logger.debug("Could not attach session id for Hub artifact collection")
-        token = agent_session.hf_token or getattr(session, "hf_token", None)
-        if not token:
-            return
-        try:
-            start_session_artifact_collection_task(session, token=token)
-        except Exception as e:
-            logger.debug(
-                "Failed to schedule Hub artifact collection for %s: %s",
-                agent_session.session_id,
-                e,
-            )
     async def _clear_persisted_sandbox_metadata(self, session_id: str) -> None:
         try:
             await self._store().update_session_fields(
@@ -591,7 +567,6 @@ class SessionManager:
                     existing,
                     preload_sandbox=preload_sandbox,
                 )
-                self._start_hub_artifact_collection(existing)
                 return existing
             return None
@@ -613,7 +588,6 @@ class SessionManager:
                     existing,
                     preload_sandbox=preload_sandbox,
                 )
-                self._start_hub_artifact_collection(existing)
                 return existing
             return None
@@ -700,9 +674,7 @@ class SessionManager:
                 hf_token=hf_token,
                 hf_username=hf_username,
             )
-            self._start_hub_artifact_collection(started)
             return started
-        self._start_hub_artifact_collection(agent_session)
         if preload_sandbox:
             self._start_cpu_sandbox_preload(agent_session)
         logger.info("Restored session %s for user %s", session_id, owner or user_id)
@@ -785,7 +757,6 @@ class SessionManager:
             event_queue=event_queue,
             tool_router=tool_router,
         )
-        self._start_hub_artifact_collection(agent_session)
         await self.persist_session_snapshot(agent_session, runtime_state="idle")
         self._start_cpu_sandbox_preload(agent_session)

 from agent.config import load_config
 from agent.core.agent_loop import process_submission
 from agent.core.session import Event, OpType, Session
 from agent.core.session_persistence import get_session_store
 from agent.core.tools import ToolRouter
         self.sessions: dict[str, AgentSession] = {}
         self._lock = asyncio.Lock()
         self.persistence_store = None
     async def start(self) -> None:
         """Start shared background resources."""
         session.sandbox_preload_cancel_event = None
         self._start_cpu_sandbox_preload(agent_session)
     async def _clear_persisted_sandbox_metadata(self, session_id: str) -> None:
         try:
             await self._store().update_session_fields(
                     existing,
                     preload_sandbox=preload_sandbox,
                 )
                 return existing
             return None
                     existing,
                     preload_sandbox=preload_sandbox,
                 )
                 return existing
             return None
                 hf_token=hf_token,
                 hf_username=hf_username,
             )
             return started
         if preload_sandbox:
             self._start_cpu_sandbox_preload(agent_session)
         logger.info("Restored session %s for user %s", session_id, owner or user_id)
             event_queue=event_queue,
             tool_router=tool_router,
         )
         await self.persist_session_snapshot(agent_session, runtime_state="idle")
         self._start_cpu_sandbox_preload(agent_session)

tests/unit/test_hub_artifacts.py CHANGED Viewed

@@ -1,4 +1,3 @@
-import asyncio
 import logging
 from types import SimpleNamespace
@@ -11,12 +10,10 @@ from agent.core.hub_artifacts import (
     artifact_collection_title,
     augment_repo_card_content,
     build_hub_artifact_sitecustomize,
-    ensure_session_artifact_collection,
     is_known_hub_artifact,
     is_sandbox_hub_repo,
     register_hub_artifact,
     remember_hub_artifact,
-    start_session_artifact_collection_task,
     wrap_shell_command_with_hub_artifact_bootstrap,
 )
 from agent.tools import local_tools, sandbox_tool
@@ -207,6 +204,7 @@ def test_register_hub_artifact_retries_after_partial_failure(monkeypatch):
     def add_to_collection(*args, **kwargs):
         nonlocal collection_attempts
         collection_attempts += 1
     monkeypatch.setattr(
         hub_artifacts,
@@ -238,6 +236,7 @@ def test_register_hub_artifact_retries_after_collection_failure(monkeypatch):
         collection_attempts += 1
         if collection_attempts == 1:
             raise RuntimeError("temporary collection failure")
     monkeypatch.setattr(hub_artifacts, "_update_repo_card", update_repo_card)
     monkeypatch.setattr(
@@ -271,63 +270,6 @@ def test_session_artifact_set_falls_back_when_session_rejects_attrs(caplog):
     assert "using process-local fallback state" in caplog.text
-@pytest.mark.asyncio
-async def test_ensure_session_artifact_collection_uses_user_token(monkeypatch):
-    session = _session()
-    calls = []
-    class FakeApi:
-        def __init__(self, token):
-            self.token = token
-    def fake_ensure_collection_slug(api, seen_session, **kwargs):
-        calls.append((api.token, seen_session, kwargs))
-        return "alice/ml-intern-artifacts"
-    monkeypatch.setattr(hub_artifacts, "HfApi", FakeApi)
-    monkeypatch.setattr(
-        hub_artifacts,
-        "_ensure_collection_slug",
-        fake_ensure_collection_slug,
-    )
-    slug = await ensure_session_artifact_collection(session, token="hf-token")
-    assert slug == "alice/ml-intern-artifacts"
-    assert calls == [
-        ("hf-token", session, {"token": "hf-token"}),
-    ]
-@pytest.mark.asyncio
-async def test_start_session_artifact_collection_task_dedupes(monkeypatch):
-    session = _session()
-    calls = []
-    async def fake_ensure_session_artifact_collection(seen_session, **kwargs):
-        calls.append((seen_session, kwargs))
-        await asyncio.sleep(0)
-        return "alice/ml-intern-artifacts"
-    monkeypatch.setattr(
-        hub_artifacts,
-        "ensure_session_artifact_collection",
-        fake_ensure_session_artifact_collection,
-    )
-    task = start_session_artifact_collection_task(session, token="hf-token")
-    second = start_session_artifact_collection_task(session, token="hf-token")
-    assert task is not None
-    assert second is task
-    await task
-    assert calls == [(session, {"token": "hf-token"})]
-def test_start_session_artifact_collection_task_skips_without_token():
-    assert start_session_artifact_collection_task(_session()) is None
 @pytest.mark.asyncio
 async def test_hf_repo_git_create_repo_registers_artifact(monkeypatch):
     session = _session()
@@ -535,6 +477,78 @@ def test_sitecustomize_bootstrap_reuses_existing_collection_slug():
     )
 def test_sitecustomize_skips_sandbox_space_registration(monkeypatch):
     import huggingface_hub as hub
     from huggingface_hub import HfApi

 import logging
 from types import SimpleNamespace
     artifact_collection_title,
     augment_repo_card_content,
     build_hub_artifact_sitecustomize,
     is_known_hub_artifact,
     is_sandbox_hub_repo,
     register_hub_artifact,
     remember_hub_artifact,
     wrap_shell_command_with_hub_artifact_bootstrap,
 )
 from agent.tools import local_tools, sandbox_tool
     def add_to_collection(*args, **kwargs):
         nonlocal collection_attempts
         collection_attempts += 1
+        return True
     monkeypatch.setattr(
         hub_artifacts,
         collection_attempts += 1
         if collection_attempts == 1:
             raise RuntimeError("temporary collection failure")
+        return True
     monkeypatch.setattr(hub_artifacts, "_update_repo_card", update_repo_card)
     monkeypatch.setattr(
     assert "using process-local fallback state" in caplog.text
 @pytest.mark.asyncio
 async def test_hf_repo_git_create_repo_registers_artifact(monkeypatch):
     session = _session()
     )
+def test_sitecustomize_caches_lazy_collection_slug_across_bootstraps(
+    monkeypatch,
+    tmp_path,
+):
+    import huggingface_hub as hub
+    from huggingface_hub import HfApi
+    readme_path = tmp_path / "README.md"
+    readme_path.write_text("# Existing Model\n", encoding="utf-8")
+    cache_path = tmp_path / "collection-slug.txt"
+    collection_slug = "alice/ml-intern-artifacts-2026-05-05-session-123"
+    uploads = []
+    downloads = []
+    collection_creates = []
+    collection_items = []
+    def fake_upload_file(self, **kwargs):
+        uploads.append(kwargs)
+        return SimpleNamespace()
+    def fake_hf_hub_download(*args, **kwargs):
+        downloads.append((args, kwargs))
+        return str(readme_path)
+    def fake_create_collection(self, **kwargs):
+        collection_creates.append(kwargs)
+        return SimpleNamespace(slug=collection_slug)
+    def fake_add_collection_item(self, **kwargs):
+        collection_items.append(kwargs)
+    monkeypatch.setenv("ML_INTERN_ARTIFACT_COLLECTION_CACHE", str(cache_path))
+    code = build_hub_artifact_sitecustomize(_session())
+    def install_fresh_bootstrap():
+        monkeypatch.setattr(HfApi, "upload_file", fake_upload_file)
+        monkeypatch.setattr(HfApi, "create_collection", fake_create_collection)
+        monkeypatch.setattr(HfApi, "add_collection_item", fake_add_collection_item)
+        monkeypatch.setattr(hub, "hf_hub_download", fake_hf_hub_download)
+        exec(code, {})
+        assert HfApi.upload_file is not fake_upload_file
+    install_fresh_bootstrap()
+    HfApi(token="hf-token").upload_file(
+        path_or_fileobj=b"weights",
+        path_in_repo="model.safetensors",
+        repo_id="alice/model-a",
+        repo_type="model",
+        token="hf-token",
+    )
+    install_fresh_bootstrap()
+    HfApi(token="hf-token").upload_file(
+        path_or_fileobj=b"weights",
+        path_in_repo="model.safetensors",
+        repo_id="alice/model-b",
+        repo_type="model",
+        token="hf-token",
+    )
+    assert cache_path.read_text(encoding="utf-8") == collection_slug
+    assert len(collection_creates) == 1
+    assert [item["item_id"] for item in collection_items] == [
+        "alice/model-a",
+        "alice/model-b",
+    ]
+    assert [download[1]["repo_id"] for download in downloads] == [
+        "alice/model-a",
+        "alice/model-b",
+    ]
 def test_sitecustomize_skips_sandbox_space_registration(monkeypatch):
     import huggingface_hub as hub
     from huggingface_hub import HfApi

tests/unit/test_sandbox_private_spaces.py CHANGED Viewed

@@ -3,8 +3,6 @@ import threading
 import time
 from types import SimpleNamespace
-import pytest
 from agent.core import telemetry
 from agent.tools import sandbox_client, sandbox_tool
 from agent.tools.sandbox_client import Sandbox
@@ -17,6 +15,7 @@ def _fail_metadata_update(*args, **kwargs):
 def test_sandbox_client_defaults_to_private_spaces(monkeypatch):
     duplicate_kwargs = {}
     requested_hardware = []
     class FakeApi:
@@ -44,11 +43,12 @@ def test_sandbox_client_defaults_to_private_spaces(monkeypatch):
     )
     monkeypatch.setattr(Sandbox, "_wait_for_api", lambda self, *args, **kwargs: None)
-    Sandbox.create(owner="alice", token="hf-token", log=lambda msg: None)
     assert duplicate_kwargs["private"] is True
     assert duplicate_kwargs["hardware"] == "cpu-basic"
     assert requested_hardware == []
 def test_sandbox_client_retries_transient_runtime_404(monkeypatch):
@@ -98,32 +98,20 @@ def test_sandbox_client_retries_transient_runtime_404(monkeypatch):
     assert runtime_calls == 2
-def test_sandbox_client_retries_transient_hardware_401(monkeypatch):
-    hardware_calls = 0
     logs: list[str] = []
-    class FakeResponse:
-        status_code = 401
-    class FakeHardware401(Exception):
-        response = FakeResponse()
-        def __str__(self):
-            return "401 Client Error: Repository Not Found"
     class FakeApi:
         def __init__(self, token=None):
             self.token = token
         def duplicate_space(self, **kwargs):
-            pass
         def request_space_hardware(self, space_id, hardware, sleep_time=None):
-            nonlocal hardware_calls
-            hardware_calls += 1
-            if hardware_calls == 1:
-                raise FakeHardware401()
-            return SimpleNamespace(stage="BUILDING", hardware=None)
         def add_space_secret(self, *args, **kwargs):
             pass
@@ -144,58 +132,62 @@ def test_sandbox_client_retries_transient_hardware_401(monkeypatch):
         owner="alice",
         token="hf-token",
         hardware="t4-small",
         log=logs.append,
     )
     assert sandbox.space_id.startswith("alice/sandbox-")
-    assert hardware_calls == 2
-    assert any("Hardware request not accepted yet (HTTP 401)" in log for log in logs)
-def test_sandbox_hardware_retry_reraises_after_timeout(monkeypatch):
-    calls = 0
     logs: list[str] = []
-    sleeps: list[float] = []
-    class FakeResponse:
-        status_code = 401
-    class FakeHardware401(Exception):
-        response = FakeResponse()
-        def __str__(self):
-            return "401 Client Error: Repository Not Found"
-    first_error = FakeHardware401("first")
-    timeout_error = FakeHardware401("timeout")
-    class FakeApi:
-        def request_space_hardware(self, space_id, hardware, sleep_time=None):
-            nonlocal calls
-            calls += 1
-            if calls == 1:
-                raise first_error
-            raise timeout_error
-    timestamps = iter([100.0, 100.0, 161.0])
-    monkeypatch.setattr(sandbox_client.time, "time", lambda: next(timestamps))
-    monkeypatch.setattr(sandbox_client.time, "sleep", sleeps.append)
-    with pytest.raises(FakeHardware401) as excinfo:
-        sandbox_client._request_space_hardware_with_retry(
-            FakeApi(),
-            "alice/sandbox-12345678",
-            hardware="cpu-basic",
-            sleep_time=None,
-            log=logs.append,
-            check_cancel=lambda: None,
-        )
-    assert excinfo.value is timeout_error
-    assert calls == 2
-    assert sleeps == [sandbox_client.WAIT_INTERVAL]
-    assert len(logs) == 1
 def test_sandbox_tool_forces_private_spaces(monkeypatch):

 import time
 from types import SimpleNamespace
 from agent.core import telemetry
 from agent.tools import sandbox_client, sandbox_tool
 from agent.tools.sandbox_client import Sandbox
 def test_sandbox_client_defaults_to_private_spaces(monkeypatch):
     duplicate_kwargs = {}
+    logs: list[str] = []
     requested_hardware = []
     class FakeApi:
     )
     monkeypatch.setattr(Sandbox, "_wait_for_api", lambda self, *args, **kwargs: None)
+    Sandbox.create(owner="alice", token="hf-token", log=logs.append)
     assert duplicate_kwargs["private"] is True
     assert duplicate_kwargs["hardware"] == "cpu-basic"
     assert requested_hardware == []
+    assert not any("sleep time" in log for log in logs)
 def test_sandbox_client_retries_transient_runtime_404(monkeypatch):
     assert runtime_calls == 2
+def test_sandbox_client_configures_gpu_at_duplication(monkeypatch):
+    duplicate_kwargs = {}
     logs: list[str] = []
+    requested_hardware = []
     class FakeApi:
         def __init__(self, token=None):
             self.token = token
         def duplicate_space(self, **kwargs):
+            duplicate_kwargs.update(kwargs)
         def request_space_hardware(self, space_id, hardware, sleep_time=None):
+            requested_hardware.append((space_id, hardware, sleep_time))
         def add_space_secret(self, *args, **kwargs):
             pass
         owner="alice",
         token="hf-token",
         hardware="t4-small",
+        sleep_time=2700,
         log=logs.append,
     )
     assert sandbox.space_id.startswith("alice/sandbox-")
+    assert duplicate_kwargs["hardware"] == "t4-small"
+    assert duplicate_kwargs["sleep_time"] == 2700
+    assert requested_hardware == []
+    assert "Using duplicated Space hardware: t4-small" in logs
+    assert "Using duplicated Space sleep time: 2700s" in logs
+def test_sandbox_client_logs_cpu_sleep_time_as_hub_fixed(monkeypatch):
+    duplicate_kwargs = {}
     logs: list[str] = []
+    requested_hardware = []
+    class FakeApi:
+        def __init__(self, token=None):
+            self.token = token
+        def duplicate_space(self, **kwargs):
+            duplicate_kwargs.update(kwargs)
+        def request_space_hardware(self, space_id, hardware, sleep_time=None):
+            requested_hardware.append((space_id, hardware, sleep_time))
+        def add_space_secret(self, *args, **kwargs):
+            pass
+        def get_space_runtime(self, space_id):
+            return SimpleNamespace(stage="RUNNING", hardware="cpu-basic")
+    monkeypatch.setattr(sandbox_client, "HfApi", FakeApi)
+    monkeypatch.setattr(
+        Sandbox,
+        "_setup_server",
+        staticmethod(lambda *args, **kwargs: None),
+    )
+    monkeypatch.setattr(Sandbox, "_wait_for_api", lambda self, *args, **kwargs: None)
+    Sandbox.create(
+        owner="alice",
+        token="hf-token",
+        sleep_time=2700,
+        log=logs.append,
+    )
+    assert duplicate_kwargs["hardware"] == "cpu-basic"
+    assert duplicate_kwargs["sleep_time"] == 2700
+    assert requested_hardware == []
+    assert "Using duplicated Space hardware: cpu-basic" in logs
+    assert (
+        "Requested duplicated Space sleep time: 2700s "
+        "(cpu-basic auto-sleep is fixed by the Hub)"
+    ) in logs
 def test_sandbox_tool_forces_private_spaces(monkeypatch):

tests/unit/test_session_manager_persistence.py CHANGED Viewed

@@ -425,32 +425,9 @@ async def test_create_session_schedules_cpu_sandbox_preload():
         assert scheduled == [session_id]
         assert session_id in manager.sessions
-    finally:
-        stop.set()
-        await _cancel_runtime_tasks(manager)
-@pytest.mark.asyncio
-async def test_create_session_starts_hub_artifact_collection(monkeypatch):
-    manager = _manager_with_store(NoopSessionStore())
-    manager.enable_hub_artifact_collections = True
-    stop = _install_fake_runtime(manager)
-    started: list[tuple[str, str]] = []
-    def fake_start_session_artifact_collection_task(session, **kwargs):
-        started.append((session.session_id, kwargs["token"]))
-        return None
-    monkeypatch.setattr(
-        "session_manager.start_session_artifact_collection_task",
-        fake_start_session_artifact_collection_task,
-    )
-    manager._start_cpu_sandbox_preload = lambda _: None  # type: ignore[method-assign]
-    try:
-        session_id = await manager.create_session(user_id="owner", hf_token="token")
-        assert started == [(session_id, "token")]
     finally:
         stop.set()
         await _cancel_runtime_tasks(manager)
@@ -475,37 +452,8 @@ async def test_lazy_restore_schedules_cpu_sandbox_preload():
         assert restored is not None
         assert scheduled == ["persisted-session"]
         assert "persisted-session" in manager.sessions
-    finally:
-        stop.set()
-        await _cancel_runtime_tasks(manager)
-@pytest.mark.asyncio
-async def test_lazy_restore_starts_hub_artifact_collection(monkeypatch):
-    manager = _manager_with_store(RestoreStore())
-    manager.enable_hub_artifact_collections = True
-    stop = _install_fake_runtime(manager)
-    started: list[tuple[str, str]] = []
-    def fake_start_session_artifact_collection_task(session, **kwargs):
-        started.append((session.session_id, kwargs["token"]))
-        return None
-    monkeypatch.setattr(
-        "session_manager.start_session_artifact_collection_task",
-        fake_start_session_artifact_collection_task,
-    )
-    manager._start_cpu_sandbox_preload = lambda _: None  # type: ignore[method-assign]
-    try:
-        restored = await manager.ensure_session_loaded(
-            "persisted-session",
-            user_id="owner",
-            hf_token="token",
-        )
-        assert restored is not None
-        assert started == [("persisted-session", "token")]
     finally:
         stop.set()
         await _cancel_runtime_tasks(manager)

         assert scheduled == [session_id]
         assert session_id in manager.sessions
+        runtime_session = manager.sessions[session_id].session
+        assert not hasattr(runtime_session, "_ml_intern_artifact_collection_task")
+        assert not hasattr(runtime_session, "_ml_intern_artifact_collection_slug")
     finally:
         stop.set()
         await _cancel_runtime_tasks(manager)
         assert restored is not None
         assert scheduled == ["persisted-session"]
         assert "persisted-session" in manager.sessions
+        assert not hasattr(restored.session, "_ml_intern_artifact_collection_task")
+        assert not hasattr(restored.session, "_ml_intern_artifact_collection_slug")
     finally:
         stop.set()
         await _cancel_runtime_tasks(manager)

tests/unit/test_session_resume.py ADDED Viewed

	@@ -0,0 +1,382 @@

+"""Tests for ``agent.core.session_resume``."""
+import json
+import os
+import time
+from pathlib import Path
+from types import SimpleNamespace
+from litellm import Message
+from agent.core import session_resume
+def _write_session_log(
+    directory: Path,
+    name: str,
+    *,
+    session_id: str,
+    content: str,
+    mtime: float,
+    user_id: str | None = "user-a",
+    extra_messages: list[dict] | None = None,
+    events: list[dict] | None = None,
+) -> Path:
+    directory.mkdir(exist_ok=True)
+    path = directory / name
+    payload = {
+        "session_id": session_id,
+        "user_id": user_id,
+        "session_start_time": "2026-01-01T00:00:00",
+        "session_end_time": "2026-01-01T00:05:00",
+        "model_name": "openai/gpt-5.5",
+        "messages": [
+            {"role": "system", "content": "old system"},
+            {"role": "user", "content": content},
+            *(extra_messages or []),
+        ],
+        "events": events
+        if events is not None
+        else [{"event_type": "turn_complete", "data": {}}],
+    }
+    path.write_text(json.dumps(payload))
+    os.utime(path, (mtime, mtime))
+    return path
+class _FakeContext:
+    def __init__(self) -> None:
+        self.items = [Message(role="system", content="current system")]
+        self.running_context_usage = 0
+        self.recompute_calls: list[str] = []
+    def _recompute_usage(self, model_name: str) -> None:
+        self.recompute_calls.append(model_name)
+        self.running_context_usage = 123
+class _FakeSession:
+    def __init__(self, *, user_id: str | None = "user-a") -> None:
+        self.context_manager = _FakeContext()
+        self.config = SimpleNamespace(model_name="moonshotai/Kimi-K2.6")
+        self.session_id = "current-session"
+        self.session_start_time = "2026-01-02T00:00:00"
+        self.user_id = user_id
+        self.logged_events: list[dict] = []
+        self._local_save_path: str | None = None
+        self.turn_count = 0
+        self.last_auto_save_turn = 0
+        self.pending_approval: dict | None = {"tool_calls": ["pending"]}
+    def update_model(self, model_name: str) -> None:
+        self.config.model_name = model_name
+def test_session_log_listing_newest_first(tmp_path):
+    log_dir = tmp_path / "session_logs"
+    older = _write_session_log(
+        log_dir,
+        "older.json",
+        session_id="older-session",
+        content="older prompt",
+        mtime=time.time() - 10,
+    )
+    newer = _write_session_log(
+        log_dir,
+        "newer.json",
+        session_id="newer-session",
+        content="newer prompt",
+        mtime=time.time(),
+    )
+    entries = session_resume.list_session_logs(log_dir)
+    assert [entry.path for entry in entries] == [newer, older]
+    assert entries[0].session_id == "newer-session"
+    assert entries[0].preview == "newer prompt"
+def test_restore_continues_when_user_id_matches(tmp_path):
+    log_dir = tmp_path / "session_logs"
+    path = _write_session_log(
+        log_dir,
+        "session.json",
+        session_id="saved-session",
+        content="continue this work",
+        mtime=time.time(),
+        user_id="user-a",
+    )
+    session = _FakeSession(user_id="user-a")
+    result = session_resume.restore_session_from_log(session, path)
+    assert result["restored_count"] == 1
+    assert result["dropped_count"] == 0
+    assert result["forked"] is False
+    assert result["model_name"] == "openai/gpt-5.5"
+    assert result["had_redacted_content"] is False
+    assert result["invalid_saved_model"] is None
+    assert session.config.model_name == "openai/gpt-5.5"
+    assert session.session_id == "saved-session"
+    # Source log path is never reused: future heartbeat saves write to a
+    # fresh file so the snapshot stays intact (regression: see source-log
+    # round-trip test below).
+    assert session._local_save_path is None
+    assert session.turn_count == 1
+    assert session.last_auto_save_turn == 1
+    assert session.pending_approval is None
+    assert [msg.role for msg in session.context_manager.items] == ["system", "user"]
+    assert session.context_manager.items[0].content == "current system"
+    assert session.context_manager.items[1].content == "continue this work"
+    assert session.context_manager.running_context_usage == 123
+    assert session.context_manager.recompute_calls == ["openai/gpt-5.5"]
+    assert len(session.logged_events) == 1
+    marker = session.logged_events[0]
+    assert marker["event_type"] == "resumed_from"
+    assert marker["data"]["forked"] is False
+    assert marker["data"]["original_session_id"] == "saved-session"
+    assert marker["data"]["original_event_count"] == 1
+def test_restore_forks_when_user_id_differs(tmp_path):
+    log_dir = tmp_path / "session_logs"
+    path = _write_session_log(
+        log_dir,
+        "session.json",
+        session_id="saved-session",
+        content="someone else's chat",
+        mtime=time.time(),
+        user_id="user-a",
+    )
+    session = _FakeSession(user_id="user-b")
+    original_session_id = session.session_id
+    original_start_time = session.session_start_time
+    result = session_resume.restore_session_from_log(session, path)
+    assert result["forked"] is True
+    assert session.session_id == original_session_id
+    assert session.session_start_time == original_start_time
+    assert session._local_save_path is None
+    marker = session.logged_events[0]
+    assert marker["event_type"] == "resumed_from"
+    assert marker["data"]["forked"] is True
+    assert marker["data"]["original_session_id"] == "saved-session"
+def test_restore_forks_when_one_side_is_anonymous(tmp_path):
+    log_dir = tmp_path / "session_logs"
+    path = _write_session_log(
+        log_dir,
+        "session.json",
+        session_id="saved-session",
+        content="anonymous save",
+        mtime=time.time(),
+        user_id=None,
+    )
+    session = _FakeSession(user_id="user-a")
+    result = session_resume.restore_session_from_log(session, path)
+    assert result["forked"] is True
+    assert session._local_save_path is None
+def test_restore_continues_when_both_sides_anonymous(tmp_path):
+    log_dir = tmp_path / "session_logs"
+    path = _write_session_log(
+        log_dir,
+        "session.json",
+        session_id="saved-session",
+        content="local-only chat",
+        mtime=time.time(),
+        user_id=None,
+    )
+    session = _FakeSession(user_id=None)
+    result = session_resume.restore_session_from_log(session, path)
+    assert result["forked"] is False
+    assert session.session_id == "saved-session"
+    assert session._local_save_path is None
+def test_restore_rejects_invalid_saved_model(tmp_path):
+    log_dir = tmp_path / "session_logs"
+    path = log_dir / "session.json"
+    log_dir.mkdir()
+    path.write_text(
+        json.dumps(
+            {
+                "session_id": "saved",
+                "user_id": "user-a",
+                "model_name": "not a real id with spaces",
+                "messages": [{"role": "user", "content": "hello"}],
+                "events": [],
+            }
+        )
+    )
+    session = _FakeSession(user_id="user-a")
+    original_model = session.config.model_name
+    result = session_resume.restore_session_from_log(session, path)
+    assert result["invalid_saved_model"] == "not a real id with spaces"
+    assert result["model_name"] == original_model
+    assert session.config.model_name == original_model
+def test_restore_counts_dropped_messages(tmp_path):
+    log_dir = tmp_path / "session_logs"
+    path = log_dir / "session.json"
+    log_dir.mkdir()
+    path.write_text(
+        json.dumps(
+            {
+                "session_id": "saved",
+                "user_id": "user-a",
+                "model_name": "openai/gpt-5.5",
+                "messages": [
+                    {"role": "user", "content": "hi"},
+                    {"role": "user", "content": 12345},  # invalid content type
+                ],
+                "events": [],
+            }
+        )
+    )
+    session = _FakeSession(user_id="user-a")
+    result = session_resume.restore_session_from_log(session, path)
+    assert result["restored_count"] == 1
+    assert result["dropped_count"] == 1
+def test_restore_does_not_overwrite_source_log_on_save(tmp_path, monkeypatch):
+    """Regression: resuming + saving must not destroy the source log on disk.
+    Without the always-fork ``_local_save_path`` reset, the next heartbeat
+    save would rewrite the source file with ``events=[resumed_from]`` and
+    ``total_cost_usd=0``, wiping the original audit trail. This builds a
+    real ``Session`` and exercises the round-trip.
+    """
+    monkeypatch.chdir(tmp_path)
+    from agent.context_manager.manager import ContextManager
+    from agent.core.session import Session
+    log_dir = tmp_path / "session_logs"
+    log_dir.mkdir()
+    src_path = log_dir / "src.json"
+    src_payload = {
+        "session_id": "saved-session",
+        "user_id": "user-a",
+        "session_start_time": "2026-01-01T00:00:00",
+        "session_end_time": "2026-01-01T00:05:00",
+        "model_name": "openai/gpt-5.5",
+        "messages": [
+            {"role": "system", "content": "old system"},
+            {"role": "user", "content": "earlier work"},
+        ],
+        "events": [
+            {"event_type": "llm_call", "data": {"cost_usd": 0.42}},
+            {"event_type": "turn_complete", "data": {}},
+        ],
+    }
+    src_path.write_text(json.dumps(src_payload, indent=2))
+    src_bytes_before = src_path.read_bytes()
+    class _Cfg:
+        model_name = "openai/gpt-5.5"
+        save_sessions = True
+        session_dataset_repo = None
+        auto_save_interval = 1
+        heartbeat_interval_s = 60
+        max_iterations = 10
+        yolo_mode = False
+        confirm_cpu_jobs = False
+        auto_file_upload = False
+        reasoning_effort = None
+        share_traces = False
+        personal_trace_repo_template = None
+        mcpServers: dict = {}
+    cm = ContextManager.__new__(ContextManager)
+    cm.items = [Message(role="system", content="current system")]
+    cm.tool_specs = []
+    cm.model_max_tokens = 200_000
+    cm.running_context_usage = 0
+    cm.compact_size = 0.1
+    cm.untouched_messages = 5
+    cm.hf_token = None
+    cm.local_mode = True
+    cm.system_prompt = "current system"
+    cm.on_message_added = None
+    import asyncio as _asyncio
+    session = Session(
+        event_queue=_asyncio.Queue(),
+        config=_Cfg(),
+        tool_router=None,
+        context_manager=cm,
+        hf_token=None,
+        user_id="user-a",
+        local_mode=True,
+    )
+    session_resume.restore_session_from_log(session, src_path)
+    assert session._local_save_path is None
+    saved_path = session.save_trajectory_local(directory=str(log_dir))
+    assert saved_path is not None
+    assert Path(saved_path) != src_path
+    assert src_path.read_bytes() == src_bytes_before
+def test_restore_flags_redacted_messages(tmp_path):
+    log_dir = tmp_path / "session_logs"
+    path = _write_session_log(
+        log_dir,
+        "session.json",
+        session_id="saved-session",
+        content="my token is [REDACTED_HF_TOKEN]",
+        mtime=time.time(),
+        user_id="user-a",
+    )
+    session = _FakeSession(user_id="user-a")
+    result = session_resume.restore_session_from_log(session, path)
+    assert result["had_redacted_content"] is True
+def test_resolve_session_log_arg_accepts_index_and_id_prefix(tmp_path):
+    log_dir = tmp_path / "session_logs"
+    older = _write_session_log(
+        log_dir,
+        "older.json",
+        session_id="abcdef-older",
+        content="x",
+        mtime=time.time() - 10,
+    )
+    newer = _write_session_log(
+        log_dir,
+        "newer.json",
+        session_id="123456-newer",
+        content="y",
+        mtime=time.time(),
+    )
+    entries = session_resume.list_session_logs(log_dir)
+    assert session_resume.resolve_session_log_arg("1", entries, log_dir) == newer
+    assert session_resume.resolve_session_log_arg("abc", entries, log_dir) == older
+    assert session_resume.resolve_session_log_arg("nope", entries, log_dir) is None