ml-intern

Sleeping

App Files Files Community

Darshan Thakare

lewtun HF Staff Codex commited on May 1

Commit

0bd7547

unverified ·

1 Parent(s): 1b922dd

Auto-share session traces to a private HF dataset (#193)

Browse files

* feat: add share_traces toggle and per-user trace repo template

* feat: support Claude Code JSONL format and per-target auth

* feat: dual-upload sessions to private user trace dataset

* chore: retry personal trace uploads on booting

* feat: add /share-traces command to flip dataset visibility

* docs: document HF trace auto-share and /share-traces

* Use HF token owner for local dev auth

Co-authored-by: Codex <codex@openai.com>

* Rename personal session trace dataset

Co-authored-by: Codex <codex@openai.com>

* Add session dataset card metadata

Co-authored-by: Codex <codex@openai.com>

* Fix session trace upload review issues

Co-authored-by: OpenAI Codex <codex@openai.com>

* Preserve secret scrubbing before trace uploads

Co-authored-by: OpenAI Codex <codex@openai.com>

* Link ML Intern demo in dataset card

Co-authored-by: OpenAI Codex <codex@openai.com>

---------

Co-authored-by: lewtun <lewis.c.tunstall@gmail.com>
Co-authored-by: Codex <codex@openai.com>

Files changed (13) hide show

README.md +35 -0
agent/config.py +7 -0
agent/context_manager/manager.py +8 -1
agent/core/agent_loop.py +6 -2
agent/core/session.py +138 -26
agent/core/session_uploader.py +541 -116
agent/main.py +110 -0
agent/utils/terminal_display.py +1 -0
backend/dependencies.py +34 -2
configs/cli_agent_config.json +2 -0
configs/frontend_agent_config.json +2 -0
tests/unit/test_dangling_tool_calls.py +10 -0
tests/unit/test_session_uploader.py +202 -0

README.md CHANGED Viewed

@@ -56,6 +56,41 @@ ml-intern --max-iterations 100 "your prompt"
 ml-intern --no-stream "your prompt"
 ```
 ## Supported Gateways
 ML Intern currently supports one-way notification gateways from CLI sessions.

 ml-intern --no-stream "your prompt"
 ```
+## Sharing Traces
+Every session is auto-uploaded to your **own private Hugging Face dataset**
+in [Claude Code JSONL format](https://huggingface.co/changelog/agent-trace-viewer),
+which the HF Agent Trace Viewer auto-detects so you can browse turns, tool
+calls, and model responses directly on the Hub.
+By default the dataset is named `{your-hf-username}/ml-intern-sessions` and is
+**created private**. You can flip it to public from inside the CLI:
+```bash
+/share-traces            # show current visibility + dataset URL
+/share-traces public     # publish (anyone can view)
+/share-traces private    # lock it back down
+```
+You can also flip visibility from the dataset page on huggingface.co — the
+agent honours whatever you set there for subsequent uploads.
+To opt out entirely, set in your CLI config (e.g. `configs/cli_agent_config.json`
+or `~/.config/ml-intern/cli_agent_config.json`):
+```json
+{ "share_traces": false }
+```
+To override the destination repo, set:
+```json
+{ "personal_trace_repo_template": "{hf_user}/my-custom-traces" }
+```
+The shared `smolagents/ml-intern-sessions` dataset is unrelated and only
+receives anonymized telemetry rows used by the backend KPI scheduler.
 ## Supported Gateways
 ML Intern currently supports one-way notification gateways from CLI sessions.

agent/config.py CHANGED Viewed

@@ -27,6 +27,13 @@ class Config(BaseModel):
     mcpServers: dict[str, MCPServerConfig] = {}
     save_sessions: bool = True
     session_dataset_repo: str = "smolagents/ml-intern-sessions"
     auto_save_interval: int = 1  # Save every N user turns (0 = disabled)
     # Mid-turn heartbeat: save + upload every N seconds while events are being
     # emitted. Guards against losing trace data on long-running turns that

     mcpServers: dict[str, MCPServerConfig] = {}
     save_sessions: bool = True
     session_dataset_repo: str = "smolagents/ml-intern-sessions"
+    # Per-user private dataset that mirrors each session in Claude Code JSONL
+    # format so the HF Agent Trace Viewer auto-renders it
+    # (https://huggingface.co/changelog/agent-trace-viewer). Created private
+    # on first use; user flips it public via /share-traces. ``{hf_user}`` is
+    # substituted at upload time from ``Session.user_id``.
+    share_traces: bool = True
+    personal_trace_repo_template: str = "{hf_user}/ml-intern-sessions"
     auto_save_interval: int = 1  # Save every N user turns (0 = disabled)
     # Mid-turn heartbeat: save + upload every N seconds while events are being
     # emitted. Guards against losing trace data on long-running turns that

agent/context_manager/manager.py CHANGED Viewed

@@ -240,6 +240,8 @@ class ContextManager:
         """Add a message to the history"""
         if token_count:
             self.running_context_usage = token_count
         self.items.append(message)
         if self.on_message_added:
             self.on_message_added(message)
@@ -312,6 +314,7 @@ class ContextManager:
                             content="Tool was not executed (interrupted or error).",
                             tool_call_id=tc.id,
                             name=tc.function.name,
                         )
                     )
@@ -424,7 +427,11 @@ class ContextManager:
             session=session,
             kind="compaction",
         )
-        summarized_message = Message(role="assistant", content=summary)
         # Reconstruct: system + first user msg + summary + recent messages
         head = [system_msg] if system_msg else []

         """Add a message to the history"""
         if token_count:
             self.running_context_usage = token_count
+        if not getattr(message, "timestamp", None):
+            message.timestamp = datetime.now().isoformat()
         self.items.append(message)
         if self.on_message_added:
             self.on_message_added(message)
                             content="Tool was not executed (interrupted or error).",
                             tool_call_id=tc.id,
                             name=tc.function.name,
+                            timestamp=datetime.now().isoformat(),
                         )
                     )
             session=session,
             kind="compaction",
         )
+        summarized_message = Message(
+            role="assistant",
+            content=summary,
+            timestamp=datetime.now().isoformat(),
+        )
         # Reconstruct: system + first user msg + summary + recent messages
         head = [system_msg] if system_msg else []

agent/core/agent_loop.py CHANGED Viewed

@@ -1579,10 +1579,14 @@ async def submission_loop(
         session_holder[0] = session
     logger.info("Agent loop started")
-    # Retry any failed uploads from previous sessions (fire-and-forget)
     if config and config.save_sessions:
         Session.retry_failed_uploads_detached(
-            directory="session_logs", repo_id=config.session_dataset_repo
         )
     try:

         session_holder[0] = session
     logger.info("Agent loop started")
+    # Retry any failed uploads from previous sessions (fire-and-forget).
+    # Includes the personal trace repo when enabled so a session that failed
+    # to publish to the user's HF dataset gets a fresh attempt on next run.
     if config and config.save_sessions:
         Session.retry_failed_uploads_detached(
+            directory="session_logs",
+            repo_id=config.session_dataset_repo,
+            personal_repo_id=session._personal_trace_repo_id(),
         )
     try:

agent/core/session.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import asyncio
 import json
 import logging
 import subprocess
 import sys
 import uuid
@@ -456,62 +457,173 @@ class Session:
             logger.error(f"Failed to update local save status: {e}")
             return False
-    def save_and_upload_detached(self, repo_id: str) -> Optional[str]:
-        """
-        Save session locally and spawn detached subprocess for upload (fire-and-forget)
-        Args:
-            repo_id: HuggingFace dataset repo ID
-        Returns:
-            Path to local save file
         """
-        # Save locally first (fast, synchronous)
-        local_path = self.save_trajectory_local(upload_status="pending")
-        if not local_path:
             return None
-        # Spawn detached subprocess for upload (fire-and-forget)
         try:
             uploader_script = Path(__file__).parent / "session_uploader.py"
-            # Use Popen with detached process
             subprocess.Popen(
-                [sys.executable, str(uploader_script), "upload", local_path, repo_id],
                 stdin=subprocess.DEVNULL,
                 stdout=subprocess.DEVNULL,
                 stderr=subprocess.DEVNULL,
                 start_new_session=True,  # Detach from parent
             )
         except Exception as e:
             logger.warning(f"Failed to spawn upload subprocess: {e}")
         return local_path
     @staticmethod
     def retry_failed_uploads_detached(
-        directory: str = "session_logs", repo_id: Optional[str] = None
     ) -> None:
         """
-        Spawn detached subprocess to retry failed/pending uploads (fire-and-forget)
         Args:
             directory: Directory containing session logs
-            repo_id: Target dataset repo ID
         """
-        if not repo_id:
             return
         try:
             uploader_script = Path(__file__).parent / "session_uploader.py"
-            # Spawn detached subprocess for retry
-            subprocess.Popen(
-                [sys.executable, str(uploader_script), "retry", directory, repo_id],
-                stdin=subprocess.DEVNULL,
-                stdout=subprocess.DEVNULL,
-                stderr=subprocess.DEVNULL,
-                start_new_session=True,  # Detach from parent
-            )
         except Exception as e:
             logger.warning(f"Failed to spawn retry subprocess: {e}")

 import asyncio
 import json
 import logging
+import os
 import subprocess
 import sys
 import uuid
             logger.error(f"Failed to update local save status: {e}")
             return False
+    def _personal_trace_repo_id(self) -> Optional[str]:
+        """Resolve the per-user trace repo id from config + user_id.
+        Returns ``None`` when sharing is disabled, the user is anonymous,
+        or the template is missing — caller skips the personal upload in
+        those cases.
         """
+        if not getattr(self.config, "share_traces", False):
+            return None
+        if not self.user_id:
+            return None
+        template = getattr(self.config, "personal_trace_repo_template", None)
+        if not template:
+            return None
+        try:
+            return template.format(hf_user=self.user_id)
+        except (KeyError, IndexError):
+            logger.debug("personal_trace_repo_template format failed: %r", template)
             return None
+    def _spawn_uploader(
+        self,
+        action: str,
+        target: str,
+        repo_id: str,
+        *,
+        format: str,
+        token_env: Optional[str],
+        private: bool,
+        token_value: Optional[str] = None,
+    ) -> None:
+        """Fire-and-forget spawn of ``session_uploader.py`` with the given args."""
         try:
             uploader_script = Path(__file__).parent / "session_uploader.py"
+            cmd = [
+                sys.executable,
+                str(uploader_script),
+                action,
+                target,
+                repo_id,
+                "--format",
+                format,
+                "--private",
+                "true" if private else "false",
+            ]
+            if token_env:
+                cmd.extend(["--token-env", token_env])
+            env = os.environ.copy()
+            if token_value:
+                env["_ML_INTERN_PERSONAL_TOKEN"] = token_value
             subprocess.Popen(
+                cmd,
                 stdin=subprocess.DEVNULL,
                 stdout=subprocess.DEVNULL,
                 stderr=subprocess.DEVNULL,
+                env=env,
                 start_new_session=True,  # Detach from parent
             )
         except Exception as e:
             logger.warning(f"Failed to spawn upload subprocess: {e}")
+    def save_and_upload_detached(self, repo_id: str) -> Optional[str]:
+        """
+        Save session locally and spawn detached subprocess(es) for upload
+        (fire-and-forget).
+        Always uploads to the shared org dataset (``repo_id``) in the
+        single-row format used by the KPI scheduler. When
+        ``config.share_traces`` is enabled and a username is known, also
+        uploads to the user's personal private dataset in Claude Code JSONL
+        format so the HF Agent Trace Viewer auto-renders it.
+        Args:
+            repo_id: HuggingFace dataset repo ID for the org/KPI upload.
+        Returns:
+            Path to local save file
+        """
+        local_path = self.save_trajectory_local(upload_status="pending")
+        if not local_path:
+            return None
+        self._spawn_uploader(
+            "upload",
+            local_path,
+            repo_id,
+            format="row",
+            token_env=None,  # default org token chain
+            private=False,
+        )
+        personal_repo = self._personal_trace_repo_id()
+        if personal_repo:
+            # User's own HF_TOKEN write-scoped to their namespace.
+            self._spawn_uploader(
+                "upload",
+                local_path,
+                personal_repo,
+                format="claude_code",
+                token_env="HF_TOKEN",
+                token_value=self.hf_token,
+                private=True,
+            )
         return local_path
     @staticmethod
     def retry_failed_uploads_detached(
+        directory: str = "session_logs",
+        repo_id: Optional[str] = None,
+        *,
+        personal_repo_id: Optional[str] = None,
     ) -> None:
         """
+        Spawn detached subprocess(es) to retry failed/pending uploads
+        (fire-and-forget).
         Args:
             directory: Directory containing session logs
+            repo_id: Target dataset repo ID for the shared org/KPI upload.
+            personal_repo_id: Per-user dataset for Claude-Code-format
+                retries. ``None`` skips the personal retry pass.
         """
+        if not repo_id and not personal_repo_id:
             return
         try:
             uploader_script = Path(__file__).parent / "session_uploader.py"
+            if repo_id:
+                subprocess.Popen(
+                    [
+                        sys.executable,
+                        str(uploader_script),
+                        "retry",
+                        directory,
+                        repo_id,
+                        "--format",
+                        "row",
+                    ],
+                    stdin=subprocess.DEVNULL,
+                    stdout=subprocess.DEVNULL,
+                    stderr=subprocess.DEVNULL,
+                    start_new_session=True,
+                )
+            if personal_repo_id:
+                subprocess.Popen(
+                    [
+                        sys.executable,
+                        str(uploader_script),
+                        "retry",
+                        directory,
+                        personal_repo_id,
+                        "--format",
+                        "claude_code",
+                        "--token-env",
+                        "HF_TOKEN",
+                        "--private",
+                        "true",
+                    ],
+                    stdin=subprocess.DEVNULL,
+                    stdout=subprocess.DEVNULL,
+                    stderr=subprocess.DEVNULL,
+                    start_new_session=True,
+                )
         except Exception as e:
             logger.warning(f"Failed to spawn retry subprocess: {e}")

agent/core/session_uploader.py CHANGED Viewed

@@ -3,39 +3,455 @@
 Standalone script for uploading session trajectories to HuggingFace.
 This runs as a separate process to avoid blocking the main agent.
 Uses individual file uploads to avoid race conditions.
 """
 import json
 import os
 import sys
 from datetime import datetime
 from pathlib import Path
 from dotenv import load_dotenv
 load_dotenv()
-# Token for session uploads. Fallback chain (least-privilege first) — matches
-# backend/kpis_scheduler.py so one write-scoped token on the Space covers every
-# telemetry dataset. Never hardcode tokens in source.
-_SESSION_TOKEN = (
-    os.environ.get("HF_SESSION_UPLOAD_TOKEN")
-    or os.environ.get("HF_TOKEN")
-    or os.environ.get("HF_ADMIN_TOKEN")
-    or ""
 )
 def upload_session_as_file(
-    session_file: str, repo_id: str, max_retries: int = 3
 ) -> bool:
-    """
-    Upload a single session as an individual JSONL file (no race conditions)
     Args:
         session_file: Path to local session JSON file
         repo_id: HuggingFace dataset repo ID
         max_retries: Number of retry attempts
     Returns:
         True if successful, False otherwise
@@ -46,96 +462,60 @@ def upload_session_as_file(
         print("Error: huggingface_hub library not available", file=sys.stderr)
         return False
     try:
-        # Load session data
-        with open(session_file, "r") as f:
-            data = json.load(f)
-        # Check if already uploaded
-        upload_status = data.get("upload_status")
-        if upload_status == "success":
             return True
-        # Use dedicated session upload token (write-only access to session dataset)
-        hf_token = _SESSION_TOKEN
         if not hf_token:
-            # Update status to failed
-            data["upload_status"] = "failed"
-            with open(session_file, "w") as f:
-                json.dump(data, f, indent=2)
             return False
-        # Scrub secrets (HF tokens, API keys, etc.) from messages + events
-        # before they leave the local disk. Best-effort regex-based redaction —
-        # see agent/core/redact.py for the patterns covered.
-        try:
-            from agent.core.redact import scrub  # type: ignore
-        except Exception:
-            # Fallback for environments where the agent package isn't importable
-            # (shouldn't happen in our subprocess, but be defensive).
-            import importlib.util
-            _spec = importlib.util.spec_from_file_location(
-                "_redact",
-                Path(__file__).parent / "redact.py",
-            )
-            _mod = importlib.util.module_from_spec(_spec)
-            _spec.loader.exec_module(_mod)  # type: ignore
-            scrub = _mod.scrub
-        scrubbed_messages = scrub(data["messages"])
-        scrubbed_events = scrub(data["events"])
-        scrubbed_tools = scrub(data.get("tools") or [])
-        # Prepare JSONL content (single line)
-        # Store messages/events/tools as JSON strings to avoid schema conflicts
-        # across sessions with different tool rosters.
-        session_row = {
-            "session_id": data["session_id"],
-            "user_id": data.get("user_id"),
-            "session_start_time": data["session_start_time"],
-            "session_end_time": data["session_end_time"],
-            "model_name": data["model_name"],
-            "total_cost_usd": data.get("total_cost_usd"),
-            "messages": json.dumps(scrubbed_messages),
-            "events": json.dumps(scrubbed_events),
-            "tools": json.dumps(scrubbed_tools),
-        }
-        # Create temporary JSONL file
         import tempfile
         with tempfile.NamedTemporaryFile(
             mode="w", suffix=".jsonl", delete=False
         ) as tmp:
-            json.dump(session_row, tmp)  # Single line JSON
             tmp_path = tmp.name
         try:
-            # Generate unique path in repo: sessions/YYYY-MM-DD/session_id.jsonl
             session_id = data["session_id"]
             date_str = datetime.fromisoformat(data["session_start_time"]).strftime(
                 "%Y-%m-%d"
             )
             repo_path = f"sessions/{date_str}/{session_id}.jsonl"
-            # Upload with retries
             api = HfApi()
             for attempt in range(max_retries):
                 try:
-                    # Try to create repo if it doesn't exist (idempotent)
                     try:
                         api.create_repo(
                             repo_id=repo_id,
                             repo_type="dataset",
-                            private=False,
                             token=hf_token,
-                            exist_ok=True,  # Don't fail if already exists
                         )
                     except Exception:
-                        # Repo might already exist, continue
                         pass
-                    # Upload the session file
                     api.upload_file(
                         path_or_fileobj=tmp_path,
                         path_in_repo=repo_path,
@@ -145,12 +525,13 @@ def upload_session_as_file(
                         commit_message=f"Add session {session_id}",
                     )
-                    # Update local status to success
-                    data["upload_status"] = "success"
-                    data["upload_url"] = f"https://huggingface.co/datasets/{repo_id}"
-                    with open(session_file, "w") as f:
-                        json.dump(data, f, indent=2)
                     return True
                 except Exception:
@@ -160,14 +541,12 @@ def upload_session_as_file(
                         wait_time = 2**attempt
                         time.sleep(wait_time)
                     else:
-                        # Final attempt failed
-                        data["upload_status"] = "failed"
-                        with open(session_file, "w") as f:
-                            json.dump(data, f, indent=2)
                         return False
         finally:
-            # Clean up temp file
             try:
                 os.unlink(tmp_path)
             except Exception:
@@ -178,56 +557,102 @@ def upload_session_as_file(
         return False
-def retry_failed_uploads(directory: str, repo_id: str):
-    """Retry all failed/pending uploads in a directory"""
     log_dir = Path(directory)
     if not log_dir.exists():
         return
     session_files = list(log_dir.glob("session_*.json"))
     for filepath in session_files:
         try:
-            with open(filepath, "r") as f:
-                data = json.load(f)
-            upload_status = data.get("upload_status", "unknown")
-            # Only retry pending or failed uploads
-            if upload_status in ["pending", "failed"]:
-                upload_session_as_file(str(filepath), repo_id)
         except Exception:
             pass
 if __name__ == "__main__":
-    if len(sys.argv) < 3:
-        print("Usage: session_uploader.py <command> <args...>")
-        sys.exit(1)
-    command = sys.argv[1]
-    if command == "upload":
-        # python session_uploader.py upload <session_file> <repo_id>
-        if len(sys.argv) < 4:
-            print("Usage: session_uploader.py upload <session_file> <repo_id>")
-            sys.exit(1)
-        session_file = sys.argv[2]
-        repo_id = sys.argv[3]
-        success = upload_session_as_file(session_file, repo_id)
-        sys.exit(0 if success else 1)
-    elif command == "retry":
-        # python session_uploader.py retry <directory> <repo_id>
-        if len(sys.argv) < 4:
-            print("Usage: session_uploader.py retry <directory> <repo_id>")
-            sys.exit(1)
-        directory = sys.argv[2]
-        repo_id = sys.argv[3]
-        retry_failed_uploads(directory, repo_id)
         sys.exit(0)
-    else:
-        print(f"Unknown command: {command}")
-        sys.exit(1)

 Standalone script for uploading session trajectories to HuggingFace.
 This runs as a separate process to avoid blocking the main agent.
 Uses individual file uploads to avoid race conditions.
+Two formats are supported:
+* ``row`` — single-line JSONL row used by the existing org telemetry/KPI
+  pipeline (``smolagents/ml-intern-sessions``). Compatible with
+  ``backend/kpis_scheduler.py``.
+* ``claude_code`` — one event per line in the Claude Code JSONL schema,
+  auto-detected by the HF Agent Trace Viewer
+  (https://huggingface.co/changelog/agent-trace-viewer). Used for the
+  per-user private dataset (default ``{hf_user}/ml-intern-sessions``).
 """
+import argparse
+import hashlib
 import json
 import os
 import sys
 from datetime import datetime
 from pathlib import Path
+from typing import Any
 from dotenv import load_dotenv
 load_dotenv()
+# Token resolution for the org KPI dataset. Fallback chain (least-privilege
+# first) — matches backend/kpis_scheduler.py so one write-scoped token on the
+# Space covers every telemetry dataset. Never hardcode tokens in source.
+_ORG_TOKEN_FALLBACK_CHAIN = (
+    "HF_SESSION_UPLOAD_TOKEN",
+    "HF_TOKEN",
+    "HF_ADMIN_TOKEN",
 )
+_PERSONAL_TOKEN_ENV = "_ML_INTERN_PERSONAL_TOKEN"
+def _resolve_token(token_env: str | None) -> str:
+    """Resolve an HF token from env. ``token_env`` overrides the fallback chain."""
+    if token_env == "HF_TOKEN":
+        try:
+            from agent.core.hf_tokens import resolve_hf_token
+            return (
+                resolve_hf_token(
+                    os.environ.get(_PERSONAL_TOKEN_ENV),
+                    os.environ.get("HF_TOKEN"),
+                )
+                or ""
+            )
+        except Exception:
+            token = os.environ.get(_PERSONAL_TOKEN_ENV) or os.environ.get("HF_TOKEN")
+            return token or ""
+    if token_env:
+        return os.environ.get(token_env, "") or ""
+    for var in _ORG_TOKEN_FALLBACK_CHAIN:
+        val = os.environ.get(var)
+        if val:
+            return val
+    return ""
+def _scrub(obj: Any) -> Any:
+    """Best-effort regex scrub for HF tokens / API keys before upload."""
+    try:
+        from agent.core.redact import scrub  # type: ignore
+    except Exception:
+        # Fallback for environments where the agent package isn't importable
+        # (shouldn't happen in our subprocess, but be defensive).
+        import importlib.util
+        _spec = importlib.util.spec_from_file_location(
+            "_redact",
+            Path(__file__).parent / "redact.py",
+        )
+        _mod = importlib.util.module_from_spec(_spec)
+        _spec.loader.exec_module(_mod)  # type: ignore
+        scrub = _mod.scrub
+    return scrub(obj)
+def _msg_uuid(session_id: str, role: str, idx: int) -> str:
+    """Deterministic UUID-shaped id for a Claude Code message.
+    Uses sha1 of ``session_id::role::idx`` so re-uploads/heartbeats keep the
+    parent/child chain stable. Same convention as the example dataset
+    https://huggingface.co/datasets/clem/hf-coding-tools-traces.
+    """
+    digest = hashlib.sha1(f"{session_id}::{role}::{idx}".encode("utf-8")).hexdigest()
+    # Format like a UUID for visual familiarity (32 hex chars w/ dashes).
+    return (
+        f"{digest[0:8]}-{digest[8:12]}-{digest[12:16]}-"
+        f"{digest[16:20]}-{digest[20:32]}"
+    )
+def _content_to_text(content: Any) -> str:
+    """Best-effort flatten of a litellm/openai content field to plain text."""
+    if content is None:
+        return ""
+    if isinstance(content, str):
+        return content
+    if isinstance(content, list):
+        parts: list[str] = []
+        for block in content:
+            if isinstance(block, dict):
+                text = block.get("text")
+                if isinstance(text, str):
+                    parts.append(text)
+                else:
+                    # Unknown content block — keep round-trippable representation.
+                    parts.append(json.dumps(block, default=str))
+            else:
+                parts.append(str(block))
+        return "\n".join(parts)
+    return str(content)
+def _parse_tool_args(raw: Any) -> Any:
+    """Tool call arguments arrive as a JSON-encoded string from LLMs."""
+    if isinstance(raw, dict):
+        return raw
+    if isinstance(raw, str):
+        try:
+            return json.loads(raw)
+        except (json.JSONDecodeError, TypeError):
+            return {"_raw": raw}
+    return raw
+def to_claude_code_jsonl(trajectory: dict) -> list[dict]:
+    """Convert an internal trajectory dict to Claude Code JSONL events.
+    Schema reference (per the HF Agent Trace Viewer auto-detector):
+        {"type":"user","message":{"role":"user","content":"..."},
+         "uuid":"...","parentUuid":null,"sessionId":"...","timestamp":"..."}
+        {"type":"assistant",
+         "message":{"role":"assistant","model":"...",
+                     "content":[{"type":"text","text":"..."},
+                                {"type":"tool_use","id":"...","name":"...","input":{...}}]},
+         "uuid":"...","parentUuid":"<prev>","sessionId":"...","timestamp":"..."}
+        {"type":"user","message":{"role":"user",
+                                  "content":[{"type":"tool_result",
+                                              "tool_use_id":"...","content":"..."}]},
+         "uuid":"...","parentUuid":"<prev>","sessionId":"...","timestamp":"..."}
+    System messages are skipped (they're not part of the viewer schema and
+    contain large prompts that pollute the trace viewer UI).
+    """
+    session_id = trajectory["session_id"]
+    model_name = trajectory.get("model_name") or ""
+    fallback_timestamp = (
+        trajectory.get("session_start_time") or datetime.now().isoformat()
+    )
+    messages: list[dict] = trajectory.get("messages") or []
+    out: list[dict] = []
+    parent_uuid: str | None = None
+    for idx, msg in enumerate(messages):
+        if not isinstance(msg, dict):
+            continue
+        role = msg.get("role")
+        if role == "system":
+            continue
+        timestamp = msg.get("timestamp") or fallback_timestamp
+        if role == "user":
+            content = _content_to_text(msg.get("content"))
+            event_uuid = _msg_uuid(session_id, "user", idx)
+            out.append(
+                {
+                    "type": "user",
+                    "message": {"role": "user", "content": content},
+                    "uuid": event_uuid,
+                    "parentUuid": parent_uuid,
+                    "sessionId": session_id,
+                    "timestamp": timestamp,
+                }
+            )
+            parent_uuid = event_uuid
+        elif role == "assistant":
+            content_text = _content_to_text(msg.get("content"))
+            content_blocks: list[dict] = []
+            if content_text:
+                content_blocks.append({"type": "text", "text": content_text})
+            for tc in msg.get("tool_calls") or []:
+                if not isinstance(tc, dict):
+                    continue
+                fn = tc.get("function") or {}
+                content_blocks.append(
+                    {
+                        "type": "tool_use",
+                        "id": tc.get("id") or "",
+                        "name": fn.get("name") or "",
+                        "input": _parse_tool_args(fn.get("arguments")),
+                    }
+                )
+            if not content_blocks:
+                # Edge case: empty assistant turn (shouldn't normally happen,
+                # but skip rather than emit an empty content array which
+                # confuses the viewer).
+                continue
+            event_uuid = _msg_uuid(session_id, "assistant", idx)
+            out.append(
+                {
+                    "type": "assistant",
+                    "message": {
+                        "role": "assistant",
+                        "model": model_name,
+                        "content": content_blocks,
+                    },
+                    "uuid": event_uuid,
+                    "parentUuid": parent_uuid,
+                    "sessionId": session_id,
+                    "timestamp": timestamp,
+                }
+            )
+            parent_uuid = event_uuid
+        elif role == "tool":
+            tool_call_id = msg.get("tool_call_id") or ""
+            content_text = _content_to_text(msg.get("content"))
+            event_uuid = _msg_uuid(session_id, "tool", idx)
+            out.append(
+                {
+                    "type": "user",
+                    "message": {
+                        "role": "user",
+                        "content": [
+                            {
+                                "type": "tool_result",
+                                "tool_use_id": tool_call_id,
+                                "content": content_text,
+                            }
+                        ],
+                    },
+                    "uuid": event_uuid,
+                    "parentUuid": parent_uuid,
+                    "sessionId": session_id,
+                    "timestamp": timestamp,
+                }
+            )
+            parent_uuid = event_uuid
+    return out
+def _scrub_session_for_upload(data: dict) -> dict:
+    """Best-effort scrub of transcript fields before any upload temp file."""
+    scrubbed = dict(data)
+    scrubbed["messages"] = _scrub(data.get("messages") or [])
+    scrubbed["events"] = _scrub(data.get("events") or [])
+    scrubbed["tools"] = _scrub(data.get("tools") or [])
+    return scrubbed
+def _write_row_payload(data: dict, tmp_path: str) -> None:
+    """Single-row JSONL (existing format) — used by KPI scheduler."""
+    scrubbed = _scrub_session_for_upload(data)
+    session_row = {
+        "session_id": data["session_id"],
+        "user_id": data.get("user_id"),
+        "session_start_time": data["session_start_time"],
+        "session_end_time": data["session_end_time"],
+        "model_name": data["model_name"],
+        "total_cost_usd": data.get("total_cost_usd"),
+        "messages": json.dumps(scrubbed["messages"]),
+        "events": json.dumps(scrubbed["events"]),
+        "tools": json.dumps(scrubbed["tools"]),
+    }
+    with open(tmp_path, "w") as tmp:
+        json.dump(session_row, tmp)
+def _write_claude_code_payload(data: dict, tmp_path: str) -> None:
+    """Multi-line JSONL in Claude Code schema for the HF trace viewer."""
+    # Scrub before conversion so secrets never reach the upload temp file.
+    scrubbed = _scrub_session_for_upload(data)
+    events = to_claude_code_jsonl(scrubbed)
+    with open(tmp_path, "w") as tmp:
+        for event in events:
+            tmp.write(json.dumps(event))
+            tmp.write("\n")
+def _status_field(format: str) -> str:
+    """Per-format upload status field on the local trajectory file."""
+    return "personal_upload_status" if format == "claude_code" else "upload_status"
+def _url_field(format: str) -> str:
+    return "personal_upload_url" if format == "claude_code" else "upload_url"
+def _read_session_file(session_file: str) -> dict:
+    """Read a local session file while respecting uploader file locks."""
+    import fcntl
+    with open(session_file, "r") as f:
+        fcntl.flock(f, fcntl.LOCK_SH)
+        try:
+            return json.load(f)
+        finally:
+            fcntl.flock(f, fcntl.LOCK_UN)
+def _update_upload_status(
+    session_file: str,
+    status_key: str,
+    url_key: str,
+    status: str,
+    dataset_url: str | None = None,
+) -> None:
+    """Atomically update only this uploader's status fields.
+    The org and personal uploaders run as separate processes against the same
+    local session JSON file. Re-read under an exclusive lock so one uploader
+    cannot clobber fields written by the other.
+    """
+    import fcntl
+    with open(session_file, "r+") as f:
+        fcntl.flock(f, fcntl.LOCK_EX)
+        try:
+            data = json.load(f)
+            data[status_key] = status
+            if dataset_url is not None:
+                data[url_key] = dataset_url
+            data["last_save_time"] = datetime.now().isoformat()
+            f.seek(0)
+            json.dump(data, f, indent=2)
+            f.truncate()
+            f.flush()
+            os.fsync(f.fileno())
+        finally:
+            fcntl.flock(f, fcntl.LOCK_UN)
+def dataset_card_readme(repo_id: str) -> str:
+    """Dataset card for personal ML Intern session trace repos."""
+    return f"""---
+pretty_name: "ML Intern Session Traces"
+language:
+- en
+license: other
+task_categories:
+- text-generation
+tags:
+- agent-traces
+- coding-agent
+- ml-intern
+- session-traces
+- claude-code
+- hf-agent-trace-viewer
+configs:
+- config_name: default
+  data_files:
+  - split: train
+    path: "sessions/**/*.jsonl"
+---
+# ML Intern session traces
+This dataset contains ML Intern coding agent session traces uploaded from local
+ML Intern runs. The traces are stored as JSON Lines files under `sessions/`,
+with one file per session.
+## Links
+- ML Intern demo: https://smolagents-ml-intern.hf.space
+- ML Intern CLI: https://github.com/huggingface/ml-intern
+## Data description
+Each `*.jsonl` file contains a single ML Intern session converted to a
+Claude-Code-style event stream for the Hugging Face Agent Trace Viewer. Entries
+can include user messages, assistant messages, tool calls, tool results, model
+metadata, and timestamps.
+Session files are written to paths of the form:
+```text
+sessions/YYYY-MM-DD/<session_id>.jsonl
+```
+## Redaction and review
+**WARNING: no comprehensive redaction or human review has been performed for this dataset.**
+ML Intern applies automated best-effort scrubbing for common secret patterns
+such as Hugging Face, Anthropic, OpenAI, GitHub, and AWS tokens before upload.
+This is not a privacy guarantee.
+These traces may contain sensitive information, including prompts, code,
+terminal output, file paths, repository names, private task context, tool
+outputs, or other data from the local development environment. Treat every
+session as potentially sensitive.
+Do not make this dataset public unless you have manually inspected the uploaded
+sessions and are comfortable sharing their full contents.
+## Limitations
+Coding agent transcripts can include private or off-topic content, failed
+experiments, credentials accidentally pasted by a user, and outputs copied from
+local files or services. Use with appropriate caution, especially before
+changing repository visibility.
+"""
+def _upload_dataset_card(api: Any, repo_id: str, token: str, format: str) -> None:
+    """Create/update a README for personal trace datasets."""
+    if format != "claude_code":
+        return
+    api.upload_file(
+        path_or_fileobj=dataset_card_readme(repo_id).encode("utf-8"),
+        path_in_repo="README.md",
+        repo_id=repo_id,
+        repo_type="dataset",
+        token=token,
+        commit_message="Update dataset card",
+    )
 def upload_session_as_file(
+    session_file: str,
+    repo_id: str,
+    max_retries: int = 3,
+    format: str = "row",
+    token_env: str | None = None,
+    private: bool = False,
 ) -> bool:
+    """Upload a single session as an individual JSONL file (no race conditions).
     Args:
         session_file: Path to local session JSON file
         repo_id: HuggingFace dataset repo ID
         max_retries: Number of retry attempts
+        format: ``row`` (default, KPI-compatible) or ``claude_code`` (HF
+            Agent Trace Viewer compatible).
+        token_env: Name of the env var holding the HF token. ``None`` falls
+            back to the org-token chain (``HF_SESSION_UPLOAD_TOKEN`` →
+            ``HF_TOKEN`` → ``HF_ADMIN_TOKEN``).
+        private: When creating the repo for the first time, mark it private.
     Returns:
         True if successful, False otherwise
         print("Error: huggingface_hub library not available", file=sys.stderr)
         return False
+    status_key = _status_field(format)
+    url_key = _url_field(format)
     try:
+        data = _read_session_file(session_file)
+        # Skip if already uploaded for this format.
+        if data.get(status_key) == "success":
             return True
+        hf_token = _resolve_token(token_env)
         if not hf_token:
+            _update_upload_status(session_file, status_key, url_key, "failed")
             return False
+        # Build temp upload payload in the requested format.
         import tempfile
         with tempfile.NamedTemporaryFile(
             mode="w", suffix=".jsonl", delete=False
         ) as tmp:
             tmp_path = tmp.name
         try:
+            if format == "claude_code":
+                _write_claude_code_payload(data, tmp_path)
+            else:
+                _write_row_payload(data, tmp_path)
             session_id = data["session_id"]
             date_str = datetime.fromisoformat(data["session_start_time"]).strftime(
                 "%Y-%m-%d"
             )
             repo_path = f"sessions/{date_str}/{session_id}.jsonl"
             api = HfApi()
             for attempt in range(max_retries):
                 try:
+                    # Idempotent create — visibility is set on first creation
+                    # only. Existing repos keep whatever the user picked via
+                    # /share-traces.
                     try:
                         api.create_repo(
                             repo_id=repo_id,
                             repo_type="dataset",
+                            private=private,
                             token=hf_token,
+                            exist_ok=True,
                         )
                     except Exception:
                         pass
+                    _upload_dataset_card(api, repo_id, hf_token, format)
                     api.upload_file(
                         path_or_fileobj=tmp_path,
                         path_in_repo=repo_path,
                         commit_message=f"Add session {session_id}",
                     )
+                    _update_upload_status(
+                        session_file,
+                        status_key,
+                        url_key,
+                        "success",
+                        f"https://huggingface.co/datasets/{repo_id}",
+                    )
                     return True
                 except Exception:
                         wait_time = 2**attempt
                         time.sleep(wait_time)
                     else:
+                        _update_upload_status(
+                            session_file, status_key, url_key, "failed"
+                        )
                         return False
         finally:
             try:
                 os.unlink(tmp_path)
             except Exception:
         return False
+def retry_failed_uploads(
+    directory: str,
+    repo_id: str,
+    format: str = "row",
+    token_env: str | None = None,
+    private: bool = False,
+):
+    """Retry all failed/pending uploads in a directory for the given format."""
     log_dir = Path(directory)
     if not log_dir.exists():
         return
+    status_key = _status_field(format)
     session_files = list(log_dir.glob("session_*.json"))
     for filepath in session_files:
         try:
+            data = _read_session_file(str(filepath))
+            # Only retry pending or failed uploads. Files predating this
+            # field don't have it; treat unknown as "not yet attempted" for
+            # the row format (legacy behavior) and "skip" for claude_code
+            # so we don't suddenly re-upload pre-existing sessions to a
+            # newly-introduced personal repo.
+            status = data.get(status_key, "unknown")
+            if format == "claude_code" and status_key not in data:
+                continue
+            if status in ("pending", "failed", "unknown"):
+                upload_session_as_file(
+                    str(filepath),
+                    repo_id,
+                    format=format,
+                    token_env=token_env,
+                    private=private,
+                )
         except Exception:
             pass
+def _str2bool(v: str) -> bool:
+    return str(v).strip().lower() in {"1", "true", "yes", "on"}
 if __name__ == "__main__":
+    parser = argparse.ArgumentParser(prog="session_uploader.py")
+    sub = parser.add_subparsers(dest="command", required=True)
+    p_upload = sub.add_parser("upload")
+    p_upload.add_argument("session_file")
+    p_upload.add_argument("repo_id")
+    p_upload.add_argument(
+        "--format",
+        choices=["row", "claude_code"],
+        default="row",
+    )
+    p_upload.add_argument(
+        "--token-env",
+        default=None,
+        help="Env var name holding the HF token (default: org fallback chain).",
+    )
+    p_upload.add_argument("--private", default="false")
+    p_retry = sub.add_parser("retry")
+    p_retry.add_argument("directory")
+    p_retry.add_argument("repo_id")
+    p_retry.add_argument(
+        "--format",
+        choices=["row", "claude_code"],
+        default="row",
+    )
+    p_retry.add_argument("--token-env", default=None)
+    p_retry.add_argument("--private", default="false")
+    args = parser.parse_args()
+    if args.command == "upload":
+        ok = upload_session_as_file(
+            args.session_file,
+            args.repo_id,
+            format=args.format,
+            token_env=args.token_env,
+            private=_str2bool(args.private),
+        )
+        sys.exit(0 if ok else 1)
+    if args.command == "retry":
+        retry_failed_uploads(
+            args.directory,
+            args.repo_id,
+            format=args.format,
+            token_env=args.token_env,
+            private=_str2bool(args.private),
+        )
         sys.exit(0)
+    parser.print_help()
+    sys.exit(1)

agent/main.py CHANGED Viewed

@@ -807,10 +807,120 @@ async def _handle_slash_command(
             print(f"Context items: {len(session.context_manager.items)}")
         return None
     print(f"Unknown command: {command}. Type /help for available commands.")
     return None
 async def main(model: str | None = None):
     """Interactive chat with the agent"""

             print(f"Context items: {len(session.context_manager.items)}")
         return None
+    if command == "/share-traces":
+        session = session_holder[0] if session_holder else None
+        await _handle_share_traces_command(arg, config, session)
+        return None
     print(f"Unknown command: {command}. Type /help for available commands.")
     return None
+async def _handle_share_traces_command(arg: str, config, session) -> None:
+    """Show or flip visibility of the user's personal trace dataset.
+    Uses the user's own HF_TOKEN (write-scoped to their namespace). Only
+    operates on the personal trace repo configured via
+    ``personal_trace_repo_template`` — never touches the shared org dataset.
+    """
+    from huggingface_hub import HfApi
+    from huggingface_hub.utils import HfHubHTTPError
+    console = get_console()
+    if session is None:
+        console.print("[bold red]No active session.[/bold red]")
+        return
+    repo_id = session._personal_trace_repo_id() if session is not None else None
+    if not repo_id:
+        if not getattr(config, "share_traces", False):
+            console.print(
+                "[yellow]share_traces is disabled in config. "
+                "Set it to true to publish per-session traces to your HF dataset."
+                "[/yellow]"
+            )
+            return
+        if not session.user_id:
+            console.print(
+                "[yellow]No HF username resolved \u2014 cannot pick a personal "
+                "trace repo. Set HF_TOKEN to a token tied to your account.[/yellow]"
+            )
+            return
+        console.print(
+            "[yellow]personal_trace_repo_template is unset \u2014 nothing to do.[/yellow]"
+        )
+        return
+    token = session.hf_token or resolve_hf_token()
+    if not token:
+        console.print(
+            "[bold red]No HF_TOKEN available.[/bold red] Cannot read or change "
+            "dataset visibility."
+        )
+        return
+    api = HfApi(token=token)
+    url = f"https://huggingface.co/datasets/{repo_id}"
+    target = arg.strip().lower()
+    if not target:
+        try:
+            info = await asyncio.to_thread(
+                api.repo_info, repo_id=repo_id, repo_type="dataset"
+            )
+            visibility = "private" if getattr(info, "private", False) else "public"
+            console.print(f"[bold]Trace dataset:[/bold] {url}")
+            console.print(f"[bold]Visibility:[/bold] {visibility}")
+            console.print(
+                "[dim]Use '/share-traces public' to publish, "
+                "'/share-traces private' to lock it back down.[/dim]"
+            )
+        except HfHubHTTPError as e:
+            if getattr(e.response, "status_code", None) == 404:
+                console.print(
+                    f"[dim]Dataset {repo_id} doesn't exist yet \u2014 it'll be "
+                    "created (private) on the next session save.[/dim]"
+                )
+            else:
+                console.print(f"[bold red]Hub error:[/bold red] {e}")
+        except Exception as e:
+            console.print(f"[bold red]Could not fetch dataset info:[/bold red] {e}")
+        return
+    if target not in {"public", "private"}:
+        console.print(
+            f"[bold red]Unknown argument:[/bold red] {target}. "
+            "Expected 'public' or 'private'."
+        )
+        return
+    private = target == "private"
+    try:
+        # Idempotent — create if missing so first-flip works even before any
+        # session has been saved yet.
+        await asyncio.to_thread(
+            api.create_repo,
+            repo_id=repo_id,
+            repo_type="dataset",
+            private=private,
+            token=token,
+            exist_ok=True,
+        )
+        await asyncio.to_thread(
+            api.update_repo_settings,
+            repo_id=repo_id,
+            repo_type="dataset",
+            private=private,
+            token=token,
+        )
+    except Exception as e:
+        console.print(f"[bold red]Failed to update visibility:[/bold red] {e}")
+        return
+    label = "PUBLIC" if not private else "private"
+    console.print(f"[green]Dataset is now {label}.[/green] {url}")
 async def main(model: str | None = None):
     """Interactive chat with the agent"""

agent/utils/terminal_display.py CHANGED Viewed

@@ -425,6 +425,7 @@ HELP_TEXT = f"""\
 {_I}  [cyan]/effort[/cyan] [level]  Reasoning effort (minimal|low|medium|high|xhigh|max|off)
 {_I}  [cyan]/yolo[/cyan]            Toggle auto-approve mode
 {_I}  [cyan]/status[/cyan]          Current model & turn count
 {_I}  [cyan]/quit[/cyan]            Exit"""

 {_I}  [cyan]/effort[/cyan] [level]  Reasoning effort (minimal|low|medium|high|xhigh|max|off)
 {_I}  [cyan]/yolo[/cyan]            Toggle auto-approve mode
 {_I}  [cyan]/status[/cyan]          Current model & turn count
+{_I}  [cyan]/share-traces[/cyan] [public|private]  Show/flip visibility of your HF trace dataset
 {_I}  [cyan]/quit[/cyan]            Exit"""

backend/dependencies.py CHANGED Viewed

@@ -138,6 +138,38 @@ async def _extract_user_from_token(token: str) -> dict[str, Any] | None:
     return user
 async def check_org_membership(token: str, org_name: str) -> bool:
     """Check if the token owner belongs to an HF org. Only caches positive results."""
     now = time.time()
@@ -170,10 +202,10 @@ async def get_current_user(request: Request) -> dict[str, Any]:
     1. Authorization: Bearer <token> header
     2. hf_access_token cookie
-    In dev mode (AUTH_ENABLED=False), returns a default dev user.
     """
     if not AUTH_ENABLED:
-        return DEV_USER
     # Try Authorization header
     token = bearer_token_from_header(request.headers.get("Authorization", ""))

     return user
+async def _dev_user_from_env() -> dict[str, Any]:
+    """Use HF_TOKEN as the dev identity when available.
+    Local dev often runs without OAuth, but session trace uploads still need a
+    real HF namespace. Deriving the dev user from HF_TOKEN keeps local uploads
+    pointed at the token owner's dataset instead of dev/ml-intern-sessions.
+    """
+    token = os.environ.get("HF_TOKEN", "")
+    if not token:
+        return DEV_USER
+    whoami = await fetch_whoami_v2(token)
+    if not isinstance(whoami, dict):
+        return DEV_USER
+    username = None
+    for key in ("name", "user", "preferred_username"):
+        value = whoami.get(key)
+        if isinstance(value, str) and value:
+            username = value
+            break
+    if not username:
+        return DEV_USER
+    return {
+        "user_id": username,
+        "username": username,
+        "authenticated": True,
+        "plan": await _fetch_user_plan(token),
+    }
 async def check_org_membership(token: str, org_name: str) -> bool:
     """Check if the token owner belongs to an HF org. Only caches positive results."""
     now = time.time()
     1. Authorization: Bearer <token> header
     2. hf_access_token cookie
+    In dev mode (AUTH_ENABLED=False), uses HF_TOKEN as the user when possible.
     """
     if not AUTH_ENABLED:
+        return await _dev_user_from_env()
     # Try Authorization header
     token = bearer_token_from_header(request.headers.get("Authorization", ""))

configs/cli_agent_config.json CHANGED Viewed

@@ -2,6 +2,8 @@
   "model_name": "anthropic/claude-opus-4-6",
   "save_sessions": true,
   "session_dataset_repo": "smolagents/ml-intern-sessions",
   "yolo_mode": false,
   "confirm_cpu_jobs": true,
   "auto_file_upload": true,

   "model_name": "anthropic/claude-opus-4-6",
   "save_sessions": true,
   "session_dataset_repo": "smolagents/ml-intern-sessions",
+  "share_traces": true,
+  "personal_trace_repo_template": "{hf_user}/ml-intern-sessions",
   "yolo_mode": false,
   "confirm_cpu_jobs": true,
   "auto_file_upload": true,

configs/frontend_agent_config.json CHANGED Viewed

@@ -2,6 +2,8 @@
   "model_name": "${ML_INTERN_CLAUDE_MODEL_ID:-bedrock/us.anthropic.claude-opus-4-6-v1}",
   "save_sessions": true,
   "session_dataset_repo": "smolagents/ml-intern-sessions",
   "yolo_mode": false,
   "confirm_cpu_jobs": true,
   "auto_file_upload": true,

   "model_name": "${ML_INTERN_CLAUDE_MODEL_ID:-bedrock/us.anthropic.claude-opus-4-6-v1}",
   "save_sessions": true,
   "session_dataset_repo": "smolagents/ml-intern-sessions",
+  "share_traces": true,
+  "personal_trace_repo_template": "{hf_user}/ml-intern-sessions",
   "yolo_mode": false,
   "confirm_cpu_jobs": true,
   "auto_file_upload": true,

tests/unit/test_dangling_tool_calls.py CHANGED Viewed

@@ -28,6 +28,7 @@ def _make_cm() -> ContextManager:
     cm.running_context_usage = 0
     cm.untouched_messages = 5
     cm.items = [Message(role="system", content="system")]
     return cm
@@ -66,6 +67,15 @@ def test_no_orphan_means_no_stub():
     assert tool_msgs[0].content == "ok"
 def test_multiple_dangling_tool_calls_in_one_assistant_message_are_all_patched():
     cm = _make_cm()
     cm.items.extend([

     cm.running_context_usage = 0
     cm.untouched_messages = 5
     cm.items = [Message(role="system", content="system")]
+    cm.on_message_added = None
     return cm
     assert tool_msgs[0].content == "ok"
+def test_add_message_records_message_timestamp():
+    cm = _make_cm()
+    msg = Message(role="user", content="hello")
+    cm.add_message(msg)
+    assert getattr(cm.items[-1], "timestamp", None)
 def test_multiple_dangling_tool_calls_in_one_assistant_message_are_all_patched():
     cm = _make_cm()
     cm.items.extend([

tests/unit/test_session_uploader.py ADDED Viewed

	@@ -0,0 +1,202 @@

+import json
+from agent.core.session_uploader import (
+    _PERSONAL_TOKEN_ENV,
+    _resolve_token,
+    _update_upload_status,
+    _upload_dataset_card,
+    _write_claude_code_payload,
+    _write_row_payload,
+    dataset_card_readme,
+    to_claude_code_jsonl,
+)
+HF_SECRET = "hf_" + "a" * 30
+ANTHROPIC_SECRET = "sk-ant-" + "b" * 24
+GITHUB_SECRET = "ghp_" + "c" * 36
+def test_dataset_card_readme_has_metadata_and_public_warning():
+    readme = dataset_card_readme("lewtun/ml-intern-sessions")
+    assert readme.startswith("---\n")
+    assert 'pretty_name: "ML Intern Session Traces"' in readme
+    assert "task_categories:\n- text-generation" in readme
+    assert "- agent-traces" in readme
+    assert "- coding-agent" in readme
+    assert "- ml-intern" in readme
+    assert 'path: "sessions/**/*.jsonl"' in readme
+    assert "ML Intern demo: https://smolagents-ml-intern.hf.space" in readme
+    assert "ML Intern CLI: https://github.com/huggingface/ml-intern" in readme
+    assert "Repository: https://huggingface.co/datasets/" not in readme
+    assert (
+        "**WARNING: no comprehensive redaction or human review has been performed for this dataset.**"
+        in readme
+    )
+    assert "automated best-effort scrubbing" in readme
+    assert "Do not make this dataset public" in readme
+def test_upload_dataset_card_only_for_claude_code_format():
+    class FakeApi:
+        def __init__(self):
+            self.calls = []
+        def upload_file(self, **kwargs):
+            self.calls.append(kwargs)
+    api = FakeApi()
+    _upload_dataset_card(api, "lewtun/ml-intern-sessions", "hf_token", "row")
+    assert api.calls == []
+    _upload_dataset_card(api, "lewtun/ml-intern-sessions", "hf_token", "claude_code")
+    assert len(api.calls) == 1
+    assert api.calls[0]["path_in_repo"] == "README.md"
+    assert api.calls[0]["repo_id"] == "lewtun/ml-intern-sessions"
+    assert api.calls[0]["repo_type"] == "dataset"
+    assert api.calls[0]["token"] == "hf_token"
+    assert b"no comprehensive redaction or human review" in api.calls[0]["path_or_fileobj"]
+def test_personal_token_env_takes_precedence_for_hf_token(monkeypatch):
+    monkeypatch.setenv(_PERSONAL_TOKEN_ENV, "personal-token")
+    monkeypatch.setenv("HF_TOKEN", "env-token")
+    assert _resolve_token("HF_TOKEN") == "personal-token"
+def test_update_upload_status_preserves_other_uploader_fields(tmp_path):
+    session_file = tmp_path / "session_123.json"
+    session_file.write_text(
+        json.dumps(
+            {
+                "session_id": "123",
+                "upload_status": "success",
+                "upload_url": "https://huggingface.co/datasets/org/sessions",
+                "personal_upload_status": "pending",
+            }
+        )
+    )
+    _update_upload_status(
+        str(session_file),
+        "personal_upload_status",
+        "personal_upload_url",
+        "success",
+        "https://huggingface.co/datasets/user/ml-intern-sessions",
+    )
+    data = json.loads(session_file.read_text())
+    assert data["upload_status"] == "success"
+    assert data["upload_url"] == "https://huggingface.co/datasets/org/sessions"
+    assert data["personal_upload_status"] == "success"
+    assert (
+        data["personal_upload_url"]
+        == "https://huggingface.co/datasets/user/ml-intern-sessions"
+    )
+def test_claude_code_jsonl_uses_message_timestamps():
+    events = to_claude_code_jsonl(
+        {
+            "session_id": "session-123",
+            "model_name": "anthropic/claude-opus-4-6",
+            "session_start_time": "2026-01-01T00:00:00",
+            "messages": [
+                {
+                    "role": "user",
+                    "content": "hello",
+                    "timestamp": "2026-01-01T00:00:01",
+                },
+                {
+                    "role": "assistant",
+                    "content": "hi",
+                    "timestamp": "2026-01-01T00:00:02",
+                },
+                {
+                    "role": "tool",
+                    "tool_call_id": "call-1",
+                    "content": "ok",
+                    "timestamp": "2026-01-01T00:00:03",
+                },
+            ],
+        }
+    )
+    assert [event["timestamp"] for event in events] == [
+        "2026-01-01T00:00:01",
+        "2026-01-01T00:00:02",
+        "2026-01-01T00:00:03",
+    ]
+def test_row_payload_scrubs_messages_events_and_tools(tmp_path):
+    tmp_file = tmp_path / "row.jsonl"
+    data = {
+        "session_id": "session-123",
+        "user_id": "lewtun",
+        "session_start_time": "2026-01-01T00:00:00",
+        "session_end_time": "2026-01-01T00:00:03",
+        "model_name": "anthropic/claude-opus-4-6",
+        "total_cost_usd": 0.01,
+        "messages": [{"role": "user", "content": f"token {HF_SECRET}"}],
+        "events": [{"type": "debug", "content": f"key {ANTHROPIC_SECRET}"}],
+        "tools": [{"name": "bash", "env": f"GITHUB_TOKEN={GITHUB_SECRET}"}],
+    }
+    _write_row_payload(data, str(tmp_file))
+    payload = tmp_file.read_text()
+    assert HF_SECRET not in payload
+    assert ANTHROPIC_SECRET not in payload
+    assert GITHUB_SECRET not in payload
+    assert "[REDACTED_HF_TOKEN]" in payload
+    assert "[REDACTED_ANTHROPIC_KEY]" in payload
+    assert "GITHUB_TOKEN=[REDACTED]" in payload
+def test_claude_code_payload_scrubs_messages_before_conversion(tmp_path):
+    tmp_file = tmp_path / "claude_code.jsonl"
+    data = {
+        "session_id": "session-123",
+        "model_name": "anthropic/claude-opus-4-6",
+        "session_start_time": "2026-01-01T00:00:00",
+        "messages": [
+            {
+                "role": "user",
+                "content": f"token {HF_SECRET}",
+                "timestamp": "2026-01-01T00:00:01",
+            },
+            {
+                "role": "assistant",
+                "content": "running tool",
+                "tool_calls": [
+                    {
+                        "id": "call-1",
+                        "function": {
+                            "name": "bash",
+                            "arguments": json.dumps({"key": ANTHROPIC_SECRET}),
+                        },
+                    }
+                ],
+                "timestamp": "2026-01-01T00:00:02",
+            },
+            {
+                "role": "tool",
+                "tool_call_id": "call-1",
+                "content": f"GITHUB_TOKEN={GITHUB_SECRET}",
+                "timestamp": "2026-01-01T00:00:03",
+            },
+        ],
+    }
+    _write_claude_code_payload(data, str(tmp_file))
+    payload = tmp_file.read_text()
+    assert HF_SECRET not in payload
+    assert ANTHROPIC_SECRET not in payload
+    assert GITHUB_SECRET not in payload
+    assert "[REDACTED_HF_TOKEN]" in payload
+    assert "[REDACTED_ANTHROPIC_KEY]" in payload
+    assert "GITHUB_TOKEN=[REDACTED]" in payload