Darshan Thakare lewtun HF Staff Codex commited on
Commit
0bd7547
·
unverified ·
1 Parent(s): 1b922dd

Auto-share session traces to a private HF dataset (#193)

Browse files

* feat: add share_traces toggle and per-user trace repo template

* feat: support Claude Code JSONL format and per-target auth

* feat: dual-upload sessions to private user trace dataset

* chore: retry personal trace uploads on booting

* feat: add /share-traces command to flip dataset visibility

* docs: document HF trace auto-share and /share-traces

* Use HF token owner for local dev auth

Co-authored-by: Codex <codex@openai.com>

* Rename personal session trace dataset

Co-authored-by: Codex <codex@openai.com>

* Add session dataset card metadata

Co-authored-by: Codex <codex@openai.com>

* Fix session trace upload review issues

Co-authored-by: OpenAI Codex <codex@openai.com>

* Preserve secret scrubbing before trace uploads

Co-authored-by: OpenAI Codex <codex@openai.com>

* Link ML Intern demo in dataset card

Co-authored-by: OpenAI Codex <codex@openai.com>

---------

Co-authored-by: lewtun <lewis.c.tunstall@gmail.com>
Co-authored-by: Codex <codex@openai.com>

README.md CHANGED
@@ -56,6 +56,41 @@ ml-intern --max-iterations 100 "your prompt"
56
  ml-intern --no-stream "your prompt"
57
  ```
58
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
  ## Supported Gateways
60
 
61
  ML Intern currently supports one-way notification gateways from CLI sessions.
 
56
  ml-intern --no-stream "your prompt"
57
  ```
58
 
59
+ ## Sharing Traces
60
+
61
+ Every session is auto-uploaded to your **own private Hugging Face dataset**
62
+ in [Claude Code JSONL format](https://huggingface.co/changelog/agent-trace-viewer),
63
+ which the HF Agent Trace Viewer auto-detects so you can browse turns, tool
64
+ calls, and model responses directly on the Hub.
65
+
66
+ By default the dataset is named `{your-hf-username}/ml-intern-sessions` and is
67
+ **created private**. You can flip it to public from inside the CLI:
68
+
69
+ ```bash
70
+ /share-traces # show current visibility + dataset URL
71
+ /share-traces public # publish (anyone can view)
72
+ /share-traces private # lock it back down
73
+ ```
74
+
75
+ You can also flip visibility from the dataset page on huggingface.co — the
76
+ agent honours whatever you set there for subsequent uploads.
77
+
78
+ To opt out entirely, set in your CLI config (e.g. `configs/cli_agent_config.json`
79
+ or `~/.config/ml-intern/cli_agent_config.json`):
80
+
81
+ ```json
82
+ { "share_traces": false }
83
+ ```
84
+
85
+ To override the destination repo, set:
86
+
87
+ ```json
88
+ { "personal_trace_repo_template": "{hf_user}/my-custom-traces" }
89
+ ```
90
+
91
+ The shared `smolagents/ml-intern-sessions` dataset is unrelated and only
92
+ receives anonymized telemetry rows used by the backend KPI scheduler.
93
+
94
  ## Supported Gateways
95
 
96
  ML Intern currently supports one-way notification gateways from CLI sessions.
agent/config.py CHANGED
@@ -27,6 +27,13 @@ class Config(BaseModel):
27
  mcpServers: dict[str, MCPServerConfig] = {}
28
  save_sessions: bool = True
29
  session_dataset_repo: str = "smolagents/ml-intern-sessions"
 
 
 
 
 
 
 
30
  auto_save_interval: int = 1 # Save every N user turns (0 = disabled)
31
  # Mid-turn heartbeat: save + upload every N seconds while events are being
32
  # emitted. Guards against losing trace data on long-running turns that
 
27
  mcpServers: dict[str, MCPServerConfig] = {}
28
  save_sessions: bool = True
29
  session_dataset_repo: str = "smolagents/ml-intern-sessions"
30
+ # Per-user private dataset that mirrors each session in Claude Code JSONL
31
+ # format so the HF Agent Trace Viewer auto-renders it
32
+ # (https://huggingface.co/changelog/agent-trace-viewer). Created private
33
+ # on first use; user flips it public via /share-traces. ``{hf_user}`` is
34
+ # substituted at upload time from ``Session.user_id``.
35
+ share_traces: bool = True
36
+ personal_trace_repo_template: str = "{hf_user}/ml-intern-sessions"
37
  auto_save_interval: int = 1 # Save every N user turns (0 = disabled)
38
  # Mid-turn heartbeat: save + upload every N seconds while events are being
39
  # emitted. Guards against losing trace data on long-running turns that
agent/context_manager/manager.py CHANGED
@@ -240,6 +240,8 @@ class ContextManager:
240
  """Add a message to the history"""
241
  if token_count:
242
  self.running_context_usage = token_count
 
 
243
  self.items.append(message)
244
  if self.on_message_added:
245
  self.on_message_added(message)
@@ -312,6 +314,7 @@ class ContextManager:
312
  content="Tool was not executed (interrupted or error).",
313
  tool_call_id=tc.id,
314
  name=tc.function.name,
 
315
  )
316
  )
317
 
@@ -424,7 +427,11 @@ class ContextManager:
424
  session=session,
425
  kind="compaction",
426
  )
427
- summarized_message = Message(role="assistant", content=summary)
 
 
 
 
428
 
429
  # Reconstruct: system + first user msg + summary + recent messages
430
  head = [system_msg] if system_msg else []
 
240
  """Add a message to the history"""
241
  if token_count:
242
  self.running_context_usage = token_count
243
+ if not getattr(message, "timestamp", None):
244
+ message.timestamp = datetime.now().isoformat()
245
  self.items.append(message)
246
  if self.on_message_added:
247
  self.on_message_added(message)
 
314
  content="Tool was not executed (interrupted or error).",
315
  tool_call_id=tc.id,
316
  name=tc.function.name,
317
+ timestamp=datetime.now().isoformat(),
318
  )
319
  )
320
 
 
427
  session=session,
428
  kind="compaction",
429
  )
430
+ summarized_message = Message(
431
+ role="assistant",
432
+ content=summary,
433
+ timestamp=datetime.now().isoformat(),
434
+ )
435
 
436
  # Reconstruct: system + first user msg + summary + recent messages
437
  head = [system_msg] if system_msg else []
agent/core/agent_loop.py CHANGED
@@ -1579,10 +1579,14 @@ async def submission_loop(
1579
  session_holder[0] = session
1580
  logger.info("Agent loop started")
1581
 
1582
- # Retry any failed uploads from previous sessions (fire-and-forget)
 
 
1583
  if config and config.save_sessions:
1584
  Session.retry_failed_uploads_detached(
1585
- directory="session_logs", repo_id=config.session_dataset_repo
 
 
1586
  )
1587
 
1588
  try:
 
1579
  session_holder[0] = session
1580
  logger.info("Agent loop started")
1581
 
1582
+ # Retry any failed uploads from previous sessions (fire-and-forget).
1583
+ # Includes the personal trace repo when enabled so a session that failed
1584
+ # to publish to the user's HF dataset gets a fresh attempt on next run.
1585
  if config and config.save_sessions:
1586
  Session.retry_failed_uploads_detached(
1587
+ directory="session_logs",
1588
+ repo_id=config.session_dataset_repo,
1589
+ personal_repo_id=session._personal_trace_repo_id(),
1590
  )
1591
 
1592
  try:
agent/core/session.py CHANGED
@@ -1,6 +1,7 @@
1
  import asyncio
2
  import json
3
  import logging
 
4
  import subprocess
5
  import sys
6
  import uuid
@@ -456,62 +457,173 @@ class Session:
456
  logger.error(f"Failed to update local save status: {e}")
457
  return False
458
 
459
- def save_and_upload_detached(self, repo_id: str) -> Optional[str]:
460
- """
461
- Save session locally and spawn detached subprocess for upload (fire-and-forget)
462
-
463
- Args:
464
- repo_id: HuggingFace dataset repo ID
465
 
466
- Returns:
467
- Path to local save file
 
468
  """
469
- # Save locally first (fast, synchronous)
470
- local_path = self.save_trajectory_local(upload_status="pending")
471
- if not local_path:
 
 
 
 
 
 
 
 
472
  return None
473
 
474
- # Spawn detached subprocess for upload (fire-and-forget)
 
 
 
 
 
 
 
 
 
 
 
475
  try:
476
  uploader_script = Path(__file__).parent / "session_uploader.py"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
477
 
478
- # Use Popen with detached process
479
  subprocess.Popen(
480
- [sys.executable, str(uploader_script), "upload", local_path, repo_id],
481
  stdin=subprocess.DEVNULL,
482
  stdout=subprocess.DEVNULL,
483
  stderr=subprocess.DEVNULL,
 
484
  start_new_session=True, # Detach from parent
485
  )
486
  except Exception as e:
487
  logger.warning(f"Failed to spawn upload subprocess: {e}")
488
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
489
  return local_path
490
 
491
  @staticmethod
492
  def retry_failed_uploads_detached(
493
- directory: str = "session_logs", repo_id: Optional[str] = None
 
 
 
494
  ) -> None:
495
  """
496
- Spawn detached subprocess to retry failed/pending uploads (fire-and-forget)
 
497
 
498
  Args:
499
  directory: Directory containing session logs
500
- repo_id: Target dataset repo ID
 
 
501
  """
502
- if not repo_id:
503
  return
504
 
505
  try:
506
  uploader_script = Path(__file__).parent / "session_uploader.py"
507
 
508
- # Spawn detached subprocess for retry
509
- subprocess.Popen(
510
- [sys.executable, str(uploader_script), "retry", directory, repo_id],
511
- stdin=subprocess.DEVNULL,
512
- stdout=subprocess.DEVNULL,
513
- stderr=subprocess.DEVNULL,
514
- start_new_session=True, # Detach from parent
515
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
516
  except Exception as e:
517
  logger.warning(f"Failed to spawn retry subprocess: {e}")
 
1
  import asyncio
2
  import json
3
  import logging
4
+ import os
5
  import subprocess
6
  import sys
7
  import uuid
 
457
  logger.error(f"Failed to update local save status: {e}")
458
  return False
459
 
460
+ def _personal_trace_repo_id(self) -> Optional[str]:
461
+ """Resolve the per-user trace repo id from config + user_id.
 
 
 
 
462
 
463
+ Returns ``None`` when sharing is disabled, the user is anonymous,
464
+ or the template is missing — caller skips the personal upload in
465
+ those cases.
466
  """
467
+ if not getattr(self.config, "share_traces", False):
468
+ return None
469
+ if not self.user_id:
470
+ return None
471
+ template = getattr(self.config, "personal_trace_repo_template", None)
472
+ if not template:
473
+ return None
474
+ try:
475
+ return template.format(hf_user=self.user_id)
476
+ except (KeyError, IndexError):
477
+ logger.debug("personal_trace_repo_template format failed: %r", template)
478
  return None
479
 
480
+ def _spawn_uploader(
481
+ self,
482
+ action: str,
483
+ target: str,
484
+ repo_id: str,
485
+ *,
486
+ format: str,
487
+ token_env: Optional[str],
488
+ private: bool,
489
+ token_value: Optional[str] = None,
490
+ ) -> None:
491
+ """Fire-and-forget spawn of ``session_uploader.py`` with the given args."""
492
  try:
493
  uploader_script = Path(__file__).parent / "session_uploader.py"
494
+ cmd = [
495
+ sys.executable,
496
+ str(uploader_script),
497
+ action,
498
+ target,
499
+ repo_id,
500
+ "--format",
501
+ format,
502
+ "--private",
503
+ "true" if private else "false",
504
+ ]
505
+ if token_env:
506
+ cmd.extend(["--token-env", token_env])
507
+
508
+ env = os.environ.copy()
509
+ if token_value:
510
+ env["_ML_INTERN_PERSONAL_TOKEN"] = token_value
511
 
 
512
  subprocess.Popen(
513
+ cmd,
514
  stdin=subprocess.DEVNULL,
515
  stdout=subprocess.DEVNULL,
516
  stderr=subprocess.DEVNULL,
517
+ env=env,
518
  start_new_session=True, # Detach from parent
519
  )
520
  except Exception as e:
521
  logger.warning(f"Failed to spawn upload subprocess: {e}")
522
 
523
+ def save_and_upload_detached(self, repo_id: str) -> Optional[str]:
524
+ """
525
+ Save session locally and spawn detached subprocess(es) for upload
526
+ (fire-and-forget).
527
+
528
+ Always uploads to the shared org dataset (``repo_id``) in the
529
+ single-row format used by the KPI scheduler. When
530
+ ``config.share_traces`` is enabled and a username is known, also
531
+ uploads to the user's personal private dataset in Claude Code JSONL
532
+ format so the HF Agent Trace Viewer auto-renders it.
533
+
534
+ Args:
535
+ repo_id: HuggingFace dataset repo ID for the org/KPI upload.
536
+
537
+ Returns:
538
+ Path to local save file
539
+ """
540
+ local_path = self.save_trajectory_local(upload_status="pending")
541
+ if not local_path:
542
+ return None
543
+
544
+ self._spawn_uploader(
545
+ "upload",
546
+ local_path,
547
+ repo_id,
548
+ format="row",
549
+ token_env=None, # default org token chain
550
+ private=False,
551
+ )
552
+
553
+ personal_repo = self._personal_trace_repo_id()
554
+ if personal_repo:
555
+ # User's own HF_TOKEN write-scoped to their namespace.
556
+ self._spawn_uploader(
557
+ "upload",
558
+ local_path,
559
+ personal_repo,
560
+ format="claude_code",
561
+ token_env="HF_TOKEN",
562
+ token_value=self.hf_token,
563
+ private=True,
564
+ )
565
+
566
  return local_path
567
 
568
  @staticmethod
569
  def retry_failed_uploads_detached(
570
+ directory: str = "session_logs",
571
+ repo_id: Optional[str] = None,
572
+ *,
573
+ personal_repo_id: Optional[str] = None,
574
  ) -> None:
575
  """
576
+ Spawn detached subprocess(es) to retry failed/pending uploads
577
+ (fire-and-forget).
578
 
579
  Args:
580
  directory: Directory containing session logs
581
+ repo_id: Target dataset repo ID for the shared org/KPI upload.
582
+ personal_repo_id: Per-user dataset for Claude-Code-format
583
+ retries. ``None`` skips the personal retry pass.
584
  """
585
+ if not repo_id and not personal_repo_id:
586
  return
587
 
588
  try:
589
  uploader_script = Path(__file__).parent / "session_uploader.py"
590
 
591
+ if repo_id:
592
+ subprocess.Popen(
593
+ [
594
+ sys.executable,
595
+ str(uploader_script),
596
+ "retry",
597
+ directory,
598
+ repo_id,
599
+ "--format",
600
+ "row",
601
+ ],
602
+ stdin=subprocess.DEVNULL,
603
+ stdout=subprocess.DEVNULL,
604
+ stderr=subprocess.DEVNULL,
605
+ start_new_session=True,
606
+ )
607
+
608
+ if personal_repo_id:
609
+ subprocess.Popen(
610
+ [
611
+ sys.executable,
612
+ str(uploader_script),
613
+ "retry",
614
+ directory,
615
+ personal_repo_id,
616
+ "--format",
617
+ "claude_code",
618
+ "--token-env",
619
+ "HF_TOKEN",
620
+ "--private",
621
+ "true",
622
+ ],
623
+ stdin=subprocess.DEVNULL,
624
+ stdout=subprocess.DEVNULL,
625
+ stderr=subprocess.DEVNULL,
626
+ start_new_session=True,
627
+ )
628
  except Exception as e:
629
  logger.warning(f"Failed to spawn retry subprocess: {e}")
agent/core/session_uploader.py CHANGED
@@ -3,39 +3,455 @@
3
  Standalone script for uploading session trajectories to HuggingFace.
4
  This runs as a separate process to avoid blocking the main agent.
5
  Uses individual file uploads to avoid race conditions.
 
 
 
 
 
 
 
 
 
 
6
  """
7
 
 
 
8
  import json
9
  import os
10
  import sys
11
  from datetime import datetime
12
  from pathlib import Path
 
13
 
14
  from dotenv import load_dotenv
15
 
16
  load_dotenv()
17
 
18
- # Token for session uploads. Fallback chain (least-privilege first) — matches
19
- # backend/kpis_scheduler.py so one write-scoped token on the Space covers every
20
- # telemetry dataset. Never hardcode tokens in source.
21
- _SESSION_TOKEN = (
22
- os.environ.get("HF_SESSION_UPLOAD_TOKEN")
23
- or os.environ.get("HF_TOKEN")
24
- or os.environ.get("HF_ADMIN_TOKEN")
25
- or ""
26
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
 
29
  def upload_session_as_file(
30
- session_file: str, repo_id: str, max_retries: int = 3
 
 
 
 
 
31
  ) -> bool:
32
- """
33
- Upload a single session as an individual JSONL file (no race conditions)
34
 
35
  Args:
36
  session_file: Path to local session JSON file
37
  repo_id: HuggingFace dataset repo ID
38
  max_retries: Number of retry attempts
 
 
 
 
 
 
39
 
40
  Returns:
41
  True if successful, False otherwise
@@ -46,96 +462,60 @@ def upload_session_as_file(
46
  print("Error: huggingface_hub library not available", file=sys.stderr)
47
  return False
48
 
 
 
 
49
  try:
50
- # Load session data
51
- with open(session_file, "r") as f:
52
- data = json.load(f)
53
 
54
- # Check if already uploaded
55
- upload_status = data.get("upload_status")
56
- if upload_status == "success":
57
  return True
58
 
59
- # Use dedicated session upload token (write-only access to session dataset)
60
- hf_token = _SESSION_TOKEN
61
  if not hf_token:
62
- # Update status to failed
63
- data["upload_status"] = "failed"
64
- with open(session_file, "w") as f:
65
- json.dump(data, f, indent=2)
66
  return False
67
 
68
- # Scrub secrets (HF tokens, API keys, etc.) from messages + events
69
- # before they leave the local disk. Best-effort regex-based redaction —
70
- # see agent/core/redact.py for the patterns covered.
71
- try:
72
- from agent.core.redact import scrub # type: ignore
73
- except Exception:
74
- # Fallback for environments where the agent package isn't importable
75
- # (shouldn't happen in our subprocess, but be defensive).
76
- import importlib.util
77
- _spec = importlib.util.spec_from_file_location(
78
- "_redact",
79
- Path(__file__).parent / "redact.py",
80
- )
81
- _mod = importlib.util.module_from_spec(_spec)
82
- _spec.loader.exec_module(_mod) # type: ignore
83
- scrub = _mod.scrub
84
- scrubbed_messages = scrub(data["messages"])
85
- scrubbed_events = scrub(data["events"])
86
- scrubbed_tools = scrub(data.get("tools") or [])
87
-
88
- # Prepare JSONL content (single line)
89
- # Store messages/events/tools as JSON strings to avoid schema conflicts
90
- # across sessions with different tool rosters.
91
- session_row = {
92
- "session_id": data["session_id"],
93
- "user_id": data.get("user_id"),
94
- "session_start_time": data["session_start_time"],
95
- "session_end_time": data["session_end_time"],
96
- "model_name": data["model_name"],
97
- "total_cost_usd": data.get("total_cost_usd"),
98
- "messages": json.dumps(scrubbed_messages),
99
- "events": json.dumps(scrubbed_events),
100
- "tools": json.dumps(scrubbed_tools),
101
- }
102
-
103
- # Create temporary JSONL file
104
  import tempfile
105
 
106
  with tempfile.NamedTemporaryFile(
107
  mode="w", suffix=".jsonl", delete=False
108
  ) as tmp:
109
- json.dump(session_row, tmp) # Single line JSON
110
  tmp_path = tmp.name
111
 
112
  try:
113
- # Generate unique path in repo: sessions/YYYY-MM-DD/session_id.jsonl
 
 
 
 
114
  session_id = data["session_id"]
115
  date_str = datetime.fromisoformat(data["session_start_time"]).strftime(
116
  "%Y-%m-%d"
117
  )
118
  repo_path = f"sessions/{date_str}/{session_id}.jsonl"
119
 
120
- # Upload with retries
121
  api = HfApi()
122
  for attempt in range(max_retries):
123
  try:
124
- # Try to create repo if it doesn't exist (idempotent)
 
 
125
  try:
126
  api.create_repo(
127
  repo_id=repo_id,
128
  repo_type="dataset",
129
- private=False,
130
  token=hf_token,
131
- exist_ok=True, # Don't fail if already exists
132
  )
133
-
134
  except Exception:
135
- # Repo might already exist, continue
136
  pass
137
 
138
- # Upload the session file
 
139
  api.upload_file(
140
  path_or_fileobj=tmp_path,
141
  path_in_repo=repo_path,
@@ -145,12 +525,13 @@ def upload_session_as_file(
145
  commit_message=f"Add session {session_id}",
146
  )
147
 
148
- # Update local status to success
149
- data["upload_status"] = "success"
150
- data["upload_url"] = f"https://huggingface.co/datasets/{repo_id}"
151
- with open(session_file, "w") as f:
152
- json.dump(data, f, indent=2)
153
-
 
154
  return True
155
 
156
  except Exception:
@@ -160,14 +541,12 @@ def upload_session_as_file(
160
  wait_time = 2**attempt
161
  time.sleep(wait_time)
162
  else:
163
- # Final attempt failed
164
- data["upload_status"] = "failed"
165
- with open(session_file, "w") as f:
166
- json.dump(data, f, indent=2)
167
  return False
168
 
169
  finally:
170
- # Clean up temp file
171
  try:
172
  os.unlink(tmp_path)
173
  except Exception:
@@ -178,56 +557,102 @@ def upload_session_as_file(
178
  return False
179
 
180
 
181
- def retry_failed_uploads(directory: str, repo_id: str):
182
- """Retry all failed/pending uploads in a directory"""
 
 
 
 
 
 
183
  log_dir = Path(directory)
184
  if not log_dir.exists():
185
  return
186
 
 
187
  session_files = list(log_dir.glob("session_*.json"))
188
 
189
  for filepath in session_files:
190
  try:
191
- with open(filepath, "r") as f:
192
- data = json.load(f)
193
-
194
- upload_status = data.get("upload_status", "unknown")
195
-
196
- # Only retry pending or failed uploads
197
- if upload_status in ["pending", "failed"]:
198
- upload_session_as_file(str(filepath), repo_id)
 
 
 
 
 
 
 
 
 
 
 
199
 
200
  except Exception:
201
  pass
202
 
203
 
 
 
 
 
204
  if __name__ == "__main__":
205
- if len(sys.argv) < 3:
206
- print("Usage: session_uploader.py <command> <args...>")
207
- sys.exit(1)
208
-
209
- command = sys.argv[1]
210
-
211
- if command == "upload":
212
- # python session_uploader.py upload <session_file> <repo_id>
213
- if len(sys.argv) < 4:
214
- print("Usage: session_uploader.py upload <session_file> <repo_id>")
215
- sys.exit(1)
216
- session_file = sys.argv[2]
217
- repo_id = sys.argv[3]
218
- success = upload_session_as_file(session_file, repo_id)
219
- sys.exit(0 if success else 1)
220
-
221
- elif command == "retry":
222
- # python session_uploader.py retry <directory> <repo_id>
223
- if len(sys.argv) < 4:
224
- print("Usage: session_uploader.py retry <directory> <repo_id>")
225
- sys.exit(1)
226
- directory = sys.argv[2]
227
- repo_id = sys.argv[3]
228
- retry_failed_uploads(directory, repo_id)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
229
  sys.exit(0)
230
 
231
- else:
232
- print(f"Unknown command: {command}")
233
- sys.exit(1)
 
3
  Standalone script for uploading session trajectories to HuggingFace.
4
  This runs as a separate process to avoid blocking the main agent.
5
  Uses individual file uploads to avoid race conditions.
6
+
7
+ Two formats are supported:
8
+
9
+ * ``row`` — single-line JSONL row used by the existing org telemetry/KPI
10
+ pipeline (``smolagents/ml-intern-sessions``). Compatible with
11
+ ``backend/kpis_scheduler.py``.
12
+ * ``claude_code`` — one event per line in the Claude Code JSONL schema,
13
+ auto-detected by the HF Agent Trace Viewer
14
+ (https://huggingface.co/changelog/agent-trace-viewer). Used for the
15
+ per-user private dataset (default ``{hf_user}/ml-intern-sessions``).
16
  """
17
 
18
+ import argparse
19
+ import hashlib
20
  import json
21
  import os
22
  import sys
23
  from datetime import datetime
24
  from pathlib import Path
25
+ from typing import Any
26
 
27
  from dotenv import load_dotenv
28
 
29
  load_dotenv()
30
 
31
+ # Token resolution for the org KPI dataset. Fallback chain (least-privilege
32
+ # first) — matches backend/kpis_scheduler.py so one write-scoped token on the
33
+ # Space covers every telemetry dataset. Never hardcode tokens in source.
34
+ _ORG_TOKEN_FALLBACK_CHAIN = (
35
+ "HF_SESSION_UPLOAD_TOKEN",
36
+ "HF_TOKEN",
37
+ "HF_ADMIN_TOKEN",
 
38
  )
39
+ _PERSONAL_TOKEN_ENV = "_ML_INTERN_PERSONAL_TOKEN"
40
+
41
+
42
+ def _resolve_token(token_env: str | None) -> str:
43
+ """Resolve an HF token from env. ``token_env`` overrides the fallback chain."""
44
+ if token_env == "HF_TOKEN":
45
+ try:
46
+ from agent.core.hf_tokens import resolve_hf_token
47
+
48
+ return (
49
+ resolve_hf_token(
50
+ os.environ.get(_PERSONAL_TOKEN_ENV),
51
+ os.environ.get("HF_TOKEN"),
52
+ )
53
+ or ""
54
+ )
55
+ except Exception:
56
+ token = os.environ.get(_PERSONAL_TOKEN_ENV) or os.environ.get("HF_TOKEN")
57
+ return token or ""
58
+
59
+ if token_env:
60
+ return os.environ.get(token_env, "") or ""
61
+ for var in _ORG_TOKEN_FALLBACK_CHAIN:
62
+ val = os.environ.get(var)
63
+ if val:
64
+ return val
65
+ return ""
66
+
67
+
68
+ def _scrub(obj: Any) -> Any:
69
+ """Best-effort regex scrub for HF tokens / API keys before upload."""
70
+ try:
71
+ from agent.core.redact import scrub # type: ignore
72
+ except Exception:
73
+ # Fallback for environments where the agent package isn't importable
74
+ # (shouldn't happen in our subprocess, but be defensive).
75
+ import importlib.util
76
+
77
+ _spec = importlib.util.spec_from_file_location(
78
+ "_redact",
79
+ Path(__file__).parent / "redact.py",
80
+ )
81
+ _mod = importlib.util.module_from_spec(_spec)
82
+ _spec.loader.exec_module(_mod) # type: ignore
83
+ scrub = _mod.scrub
84
+ return scrub(obj)
85
+
86
+
87
+ def _msg_uuid(session_id: str, role: str, idx: int) -> str:
88
+ """Deterministic UUID-shaped id for a Claude Code message.
89
+
90
+ Uses sha1 of ``session_id::role::idx`` so re-uploads/heartbeats keep the
91
+ parent/child chain stable. Same convention as the example dataset
92
+ https://huggingface.co/datasets/clem/hf-coding-tools-traces.
93
+ """
94
+ digest = hashlib.sha1(f"{session_id}::{role}::{idx}".encode("utf-8")).hexdigest()
95
+ # Format like a UUID for visual familiarity (32 hex chars w/ dashes).
96
+ return (
97
+ f"{digest[0:8]}-{digest[8:12]}-{digest[12:16]}-"
98
+ f"{digest[16:20]}-{digest[20:32]}"
99
+ )
100
+
101
+
102
+ def _content_to_text(content: Any) -> str:
103
+ """Best-effort flatten of a litellm/openai content field to plain text."""
104
+ if content is None:
105
+ return ""
106
+ if isinstance(content, str):
107
+ return content
108
+ if isinstance(content, list):
109
+ parts: list[str] = []
110
+ for block in content:
111
+ if isinstance(block, dict):
112
+ text = block.get("text")
113
+ if isinstance(text, str):
114
+ parts.append(text)
115
+ else:
116
+ # Unknown content block — keep round-trippable representation.
117
+ parts.append(json.dumps(block, default=str))
118
+ else:
119
+ parts.append(str(block))
120
+ return "\n".join(parts)
121
+ return str(content)
122
+
123
+
124
+ def _parse_tool_args(raw: Any) -> Any:
125
+ """Tool call arguments arrive as a JSON-encoded string from LLMs."""
126
+ if isinstance(raw, dict):
127
+ return raw
128
+ if isinstance(raw, str):
129
+ try:
130
+ return json.loads(raw)
131
+ except (json.JSONDecodeError, TypeError):
132
+ return {"_raw": raw}
133
+ return raw
134
+
135
+
136
+ def to_claude_code_jsonl(trajectory: dict) -> list[dict]:
137
+ """Convert an internal trajectory dict to Claude Code JSONL events.
138
+
139
+ Schema reference (per the HF Agent Trace Viewer auto-detector):
140
+
141
+ {"type":"user","message":{"role":"user","content":"..."},
142
+ "uuid":"...","parentUuid":null,"sessionId":"...","timestamp":"..."}
143
+ {"type":"assistant",
144
+ "message":{"role":"assistant","model":"...",
145
+ "content":[{"type":"text","text":"..."},
146
+ {"type":"tool_use","id":"...","name":"...","input":{...}}]},
147
+ "uuid":"...","parentUuid":"<prev>","sessionId":"...","timestamp":"..."}
148
+ {"type":"user","message":{"role":"user",
149
+ "content":[{"type":"tool_result",
150
+ "tool_use_id":"...","content":"..."}]},
151
+ "uuid":"...","parentUuid":"<prev>","sessionId":"...","timestamp":"..."}
152
+
153
+ System messages are skipped (they're not part of the viewer schema and
154
+ contain large prompts that pollute the trace viewer UI).
155
+ """
156
+ session_id = trajectory["session_id"]
157
+ model_name = trajectory.get("model_name") or ""
158
+ fallback_timestamp = (
159
+ trajectory.get("session_start_time") or datetime.now().isoformat()
160
+ )
161
+ messages: list[dict] = trajectory.get("messages") or []
162
+
163
+ out: list[dict] = []
164
+ parent_uuid: str | None = None
165
+
166
+ for idx, msg in enumerate(messages):
167
+ if not isinstance(msg, dict):
168
+ continue
169
+ role = msg.get("role")
170
+ if role == "system":
171
+ continue
172
+ timestamp = msg.get("timestamp") or fallback_timestamp
173
+
174
+ if role == "user":
175
+ content = _content_to_text(msg.get("content"))
176
+ event_uuid = _msg_uuid(session_id, "user", idx)
177
+ out.append(
178
+ {
179
+ "type": "user",
180
+ "message": {"role": "user", "content": content},
181
+ "uuid": event_uuid,
182
+ "parentUuid": parent_uuid,
183
+ "sessionId": session_id,
184
+ "timestamp": timestamp,
185
+ }
186
+ )
187
+ parent_uuid = event_uuid
188
+
189
+ elif role == "assistant":
190
+ content_text = _content_to_text(msg.get("content"))
191
+ content_blocks: list[dict] = []
192
+ if content_text:
193
+ content_blocks.append({"type": "text", "text": content_text})
194
+ for tc in msg.get("tool_calls") or []:
195
+ if not isinstance(tc, dict):
196
+ continue
197
+ fn = tc.get("function") or {}
198
+ content_blocks.append(
199
+ {
200
+ "type": "tool_use",
201
+ "id": tc.get("id") or "",
202
+ "name": fn.get("name") or "",
203
+ "input": _parse_tool_args(fn.get("arguments")),
204
+ }
205
+ )
206
+ if not content_blocks:
207
+ # Edge case: empty assistant turn (shouldn't normally happen,
208
+ # but skip rather than emit an empty content array which
209
+ # confuses the viewer).
210
+ continue
211
+ event_uuid = _msg_uuid(session_id, "assistant", idx)
212
+ out.append(
213
+ {
214
+ "type": "assistant",
215
+ "message": {
216
+ "role": "assistant",
217
+ "model": model_name,
218
+ "content": content_blocks,
219
+ },
220
+ "uuid": event_uuid,
221
+ "parentUuid": parent_uuid,
222
+ "sessionId": session_id,
223
+ "timestamp": timestamp,
224
+ }
225
+ )
226
+ parent_uuid = event_uuid
227
+
228
+ elif role == "tool":
229
+ tool_call_id = msg.get("tool_call_id") or ""
230
+ content_text = _content_to_text(msg.get("content"))
231
+ event_uuid = _msg_uuid(session_id, "tool", idx)
232
+ out.append(
233
+ {
234
+ "type": "user",
235
+ "message": {
236
+ "role": "user",
237
+ "content": [
238
+ {
239
+ "type": "tool_result",
240
+ "tool_use_id": tool_call_id,
241
+ "content": content_text,
242
+ }
243
+ ],
244
+ },
245
+ "uuid": event_uuid,
246
+ "parentUuid": parent_uuid,
247
+ "sessionId": session_id,
248
+ "timestamp": timestamp,
249
+ }
250
+ )
251
+ parent_uuid = event_uuid
252
+
253
+ return out
254
+
255
+
256
+ def _scrub_session_for_upload(data: dict) -> dict:
257
+ """Best-effort scrub of transcript fields before any upload temp file."""
258
+ scrubbed = dict(data)
259
+ scrubbed["messages"] = _scrub(data.get("messages") or [])
260
+ scrubbed["events"] = _scrub(data.get("events") or [])
261
+ scrubbed["tools"] = _scrub(data.get("tools") or [])
262
+ return scrubbed
263
+
264
+
265
+ def _write_row_payload(data: dict, tmp_path: str) -> None:
266
+ """Single-row JSONL (existing format) — used by KPI scheduler."""
267
+ scrubbed = _scrub_session_for_upload(data)
268
+ session_row = {
269
+ "session_id": data["session_id"],
270
+ "user_id": data.get("user_id"),
271
+ "session_start_time": data["session_start_time"],
272
+ "session_end_time": data["session_end_time"],
273
+ "model_name": data["model_name"],
274
+ "total_cost_usd": data.get("total_cost_usd"),
275
+ "messages": json.dumps(scrubbed["messages"]),
276
+ "events": json.dumps(scrubbed["events"]),
277
+ "tools": json.dumps(scrubbed["tools"]),
278
+ }
279
+
280
+ with open(tmp_path, "w") as tmp:
281
+ json.dump(session_row, tmp)
282
+
283
+
284
+ def _write_claude_code_payload(data: dict, tmp_path: str) -> None:
285
+ """Multi-line JSONL in Claude Code schema for the HF trace viewer."""
286
+ # Scrub before conversion so secrets never reach the upload temp file.
287
+ scrubbed = _scrub_session_for_upload(data)
288
+ events = to_claude_code_jsonl(scrubbed)
289
+ with open(tmp_path, "w") as tmp:
290
+ for event in events:
291
+ tmp.write(json.dumps(event))
292
+ tmp.write("\n")
293
+
294
+
295
+ def _status_field(format: str) -> str:
296
+ """Per-format upload status field on the local trajectory file."""
297
+ return "personal_upload_status" if format == "claude_code" else "upload_status"
298
+
299
+
300
+ def _url_field(format: str) -> str:
301
+ return "personal_upload_url" if format == "claude_code" else "upload_url"
302
+
303
+
304
+ def _read_session_file(session_file: str) -> dict:
305
+ """Read a local session file while respecting uploader file locks."""
306
+ import fcntl
307
+
308
+ with open(session_file, "r") as f:
309
+ fcntl.flock(f, fcntl.LOCK_SH)
310
+ try:
311
+ return json.load(f)
312
+ finally:
313
+ fcntl.flock(f, fcntl.LOCK_UN)
314
+
315
+
316
+ def _update_upload_status(
317
+ session_file: str,
318
+ status_key: str,
319
+ url_key: str,
320
+ status: str,
321
+ dataset_url: str | None = None,
322
+ ) -> None:
323
+ """Atomically update only this uploader's status fields.
324
+
325
+ The org and personal uploaders run as separate processes against the same
326
+ local session JSON file. Re-read under an exclusive lock so one uploader
327
+ cannot clobber fields written by the other.
328
+ """
329
+ import fcntl
330
+
331
+ with open(session_file, "r+") as f:
332
+ fcntl.flock(f, fcntl.LOCK_EX)
333
+ try:
334
+ data = json.load(f)
335
+ data[status_key] = status
336
+ if dataset_url is not None:
337
+ data[url_key] = dataset_url
338
+ data["last_save_time"] = datetime.now().isoformat()
339
+ f.seek(0)
340
+ json.dump(data, f, indent=2)
341
+ f.truncate()
342
+ f.flush()
343
+ os.fsync(f.fileno())
344
+ finally:
345
+ fcntl.flock(f, fcntl.LOCK_UN)
346
+
347
+
348
+ def dataset_card_readme(repo_id: str) -> str:
349
+ """Dataset card for personal ML Intern session trace repos."""
350
+ return f"""---
351
+ pretty_name: "ML Intern Session Traces"
352
+ language:
353
+ - en
354
+ license: other
355
+ task_categories:
356
+ - text-generation
357
+ tags:
358
+ - agent-traces
359
+ - coding-agent
360
+ - ml-intern
361
+ - session-traces
362
+ - claude-code
363
+ - hf-agent-trace-viewer
364
+ configs:
365
+ - config_name: default
366
+ data_files:
367
+ - split: train
368
+ path: "sessions/**/*.jsonl"
369
+ ---
370
+
371
+ # ML Intern session traces
372
+
373
+ This dataset contains ML Intern coding agent session traces uploaded from local
374
+ ML Intern runs. The traces are stored as JSON Lines files under `sessions/`,
375
+ with one file per session.
376
+
377
+ ## Links
378
+
379
+ - ML Intern demo: https://smolagents-ml-intern.hf.space
380
+ - ML Intern CLI: https://github.com/huggingface/ml-intern
381
+
382
+ ## Data description
383
+
384
+ Each `*.jsonl` file contains a single ML Intern session converted to a
385
+ Claude-Code-style event stream for the Hugging Face Agent Trace Viewer. Entries
386
+ can include user messages, assistant messages, tool calls, tool results, model
387
+ metadata, and timestamps.
388
+
389
+ Session files are written to paths of the form:
390
+
391
+ ```text
392
+ sessions/YYYY-MM-DD/<session_id>.jsonl
393
+ ```
394
+
395
+ ## Redaction and review
396
+
397
+ **WARNING: no comprehensive redaction or human review has been performed for this dataset.**
398
+
399
+ ML Intern applies automated best-effort scrubbing for common secret patterns
400
+ such as Hugging Face, Anthropic, OpenAI, GitHub, and AWS tokens before upload.
401
+ This is not a privacy guarantee.
402
+
403
+ These traces may contain sensitive information, including prompts, code,
404
+ terminal output, file paths, repository names, private task context, tool
405
+ outputs, or other data from the local development environment. Treat every
406
+ session as potentially sensitive.
407
+
408
+ Do not make this dataset public unless you have manually inspected the uploaded
409
+ sessions and are comfortable sharing their full contents.
410
+
411
+ ## Limitations
412
+
413
+ Coding agent transcripts can include private or off-topic content, failed
414
+ experiments, credentials accidentally pasted by a user, and outputs copied from
415
+ local files or services. Use with appropriate caution, especially before
416
+ changing repository visibility.
417
+ """
418
+
419
+
420
+ def _upload_dataset_card(api: Any, repo_id: str, token: str, format: str) -> None:
421
+ """Create/update a README for personal trace datasets."""
422
+ if format != "claude_code":
423
+ return
424
+
425
+ api.upload_file(
426
+ path_or_fileobj=dataset_card_readme(repo_id).encode("utf-8"),
427
+ path_in_repo="README.md",
428
+ repo_id=repo_id,
429
+ repo_type="dataset",
430
+ token=token,
431
+ commit_message="Update dataset card",
432
+ )
433
 
434
 
435
  def upload_session_as_file(
436
+ session_file: str,
437
+ repo_id: str,
438
+ max_retries: int = 3,
439
+ format: str = "row",
440
+ token_env: str | None = None,
441
+ private: bool = False,
442
  ) -> bool:
443
+ """Upload a single session as an individual JSONL file (no race conditions).
 
444
 
445
  Args:
446
  session_file: Path to local session JSON file
447
  repo_id: HuggingFace dataset repo ID
448
  max_retries: Number of retry attempts
449
+ format: ``row`` (default, KPI-compatible) or ``claude_code`` (HF
450
+ Agent Trace Viewer compatible).
451
+ token_env: Name of the env var holding the HF token. ``None`` falls
452
+ back to the org-token chain (``HF_SESSION_UPLOAD_TOKEN`` →
453
+ ``HF_TOKEN`` → ``HF_ADMIN_TOKEN``).
454
+ private: When creating the repo for the first time, mark it private.
455
 
456
  Returns:
457
  True if successful, False otherwise
 
462
  print("Error: huggingface_hub library not available", file=sys.stderr)
463
  return False
464
 
465
+ status_key = _status_field(format)
466
+ url_key = _url_field(format)
467
+
468
  try:
469
+ data = _read_session_file(session_file)
 
 
470
 
471
+ # Skip if already uploaded for this format.
472
+ if data.get(status_key) == "success":
 
473
  return True
474
 
475
+ hf_token = _resolve_token(token_env)
 
476
  if not hf_token:
477
+ _update_upload_status(session_file, status_key, url_key, "failed")
 
 
 
478
  return False
479
 
480
+ # Build temp upload payload in the requested format.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
481
  import tempfile
482
 
483
  with tempfile.NamedTemporaryFile(
484
  mode="w", suffix=".jsonl", delete=False
485
  ) as tmp:
 
486
  tmp_path = tmp.name
487
 
488
  try:
489
+ if format == "claude_code":
490
+ _write_claude_code_payload(data, tmp_path)
491
+ else:
492
+ _write_row_payload(data, tmp_path)
493
+
494
  session_id = data["session_id"]
495
  date_str = datetime.fromisoformat(data["session_start_time"]).strftime(
496
  "%Y-%m-%d"
497
  )
498
  repo_path = f"sessions/{date_str}/{session_id}.jsonl"
499
 
 
500
  api = HfApi()
501
  for attempt in range(max_retries):
502
  try:
503
+ # Idempotent create visibility is set on first creation
504
+ # only. Existing repos keep whatever the user picked via
505
+ # /share-traces.
506
  try:
507
  api.create_repo(
508
  repo_id=repo_id,
509
  repo_type="dataset",
510
+ private=private,
511
  token=hf_token,
512
+ exist_ok=True,
513
  )
 
514
  except Exception:
 
515
  pass
516
 
517
+ _upload_dataset_card(api, repo_id, hf_token, format)
518
+
519
  api.upload_file(
520
  path_or_fileobj=tmp_path,
521
  path_in_repo=repo_path,
 
525
  commit_message=f"Add session {session_id}",
526
  )
527
 
528
+ _update_upload_status(
529
+ session_file,
530
+ status_key,
531
+ url_key,
532
+ "success",
533
+ f"https://huggingface.co/datasets/{repo_id}",
534
+ )
535
  return True
536
 
537
  except Exception:
 
541
  wait_time = 2**attempt
542
  time.sleep(wait_time)
543
  else:
544
+ _update_upload_status(
545
+ session_file, status_key, url_key, "failed"
546
+ )
 
547
  return False
548
 
549
  finally:
 
550
  try:
551
  os.unlink(tmp_path)
552
  except Exception:
 
557
  return False
558
 
559
 
560
+ def retry_failed_uploads(
561
+ directory: str,
562
+ repo_id: str,
563
+ format: str = "row",
564
+ token_env: str | None = None,
565
+ private: bool = False,
566
+ ):
567
+ """Retry all failed/pending uploads in a directory for the given format."""
568
  log_dir = Path(directory)
569
  if not log_dir.exists():
570
  return
571
 
572
+ status_key = _status_field(format)
573
  session_files = list(log_dir.glob("session_*.json"))
574
 
575
  for filepath in session_files:
576
  try:
577
+ data = _read_session_file(str(filepath))
578
+
579
+ # Only retry pending or failed uploads. Files predating this
580
+ # field don't have it; treat unknown as "not yet attempted" for
581
+ # the row format (legacy behavior) and "skip" for claude_code
582
+ # so we don't suddenly re-upload pre-existing sessions to a
583
+ # newly-introduced personal repo.
584
+ status = data.get(status_key, "unknown")
585
+ if format == "claude_code" and status_key not in data:
586
+ continue
587
+
588
+ if status in ("pending", "failed", "unknown"):
589
+ upload_session_as_file(
590
+ str(filepath),
591
+ repo_id,
592
+ format=format,
593
+ token_env=token_env,
594
+ private=private,
595
+ )
596
 
597
  except Exception:
598
  pass
599
 
600
 
601
+ def _str2bool(v: str) -> bool:
602
+ return str(v).strip().lower() in {"1", "true", "yes", "on"}
603
+
604
+
605
  if __name__ == "__main__":
606
+ parser = argparse.ArgumentParser(prog="session_uploader.py")
607
+ sub = parser.add_subparsers(dest="command", required=True)
608
+
609
+ p_upload = sub.add_parser("upload")
610
+ p_upload.add_argument("session_file")
611
+ p_upload.add_argument("repo_id")
612
+ p_upload.add_argument(
613
+ "--format",
614
+ choices=["row", "claude_code"],
615
+ default="row",
616
+ )
617
+ p_upload.add_argument(
618
+ "--token-env",
619
+ default=None,
620
+ help="Env var name holding the HF token (default: org fallback chain).",
621
+ )
622
+ p_upload.add_argument("--private", default="false")
623
+
624
+ p_retry = sub.add_parser("retry")
625
+ p_retry.add_argument("directory")
626
+ p_retry.add_argument("repo_id")
627
+ p_retry.add_argument(
628
+ "--format",
629
+ choices=["row", "claude_code"],
630
+ default="row",
631
+ )
632
+ p_retry.add_argument("--token-env", default=None)
633
+ p_retry.add_argument("--private", default="false")
634
+
635
+ args = parser.parse_args()
636
+
637
+ if args.command == "upload":
638
+ ok = upload_session_as_file(
639
+ args.session_file,
640
+ args.repo_id,
641
+ format=args.format,
642
+ token_env=args.token_env,
643
+ private=_str2bool(args.private),
644
+ )
645
+ sys.exit(0 if ok else 1)
646
+
647
+ if args.command == "retry":
648
+ retry_failed_uploads(
649
+ args.directory,
650
+ args.repo_id,
651
+ format=args.format,
652
+ token_env=args.token_env,
653
+ private=_str2bool(args.private),
654
+ )
655
  sys.exit(0)
656
 
657
+ parser.print_help()
658
+ sys.exit(1)
 
agent/main.py CHANGED
@@ -807,10 +807,120 @@ async def _handle_slash_command(
807
  print(f"Context items: {len(session.context_manager.items)}")
808
  return None
809
 
 
 
 
 
 
810
  print(f"Unknown command: {command}. Type /help for available commands.")
811
  return None
812
 
813
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
814
  async def main(model: str | None = None):
815
  """Interactive chat with the agent"""
816
 
 
807
  print(f"Context items: {len(session.context_manager.items)}")
808
  return None
809
 
810
+ if command == "/share-traces":
811
+ session = session_holder[0] if session_holder else None
812
+ await _handle_share_traces_command(arg, config, session)
813
+ return None
814
+
815
  print(f"Unknown command: {command}. Type /help for available commands.")
816
  return None
817
 
818
 
819
+ async def _handle_share_traces_command(arg: str, config, session) -> None:
820
+ """Show or flip visibility of the user's personal trace dataset.
821
+
822
+ Uses the user's own HF_TOKEN (write-scoped to their namespace). Only
823
+ operates on the personal trace repo configured via
824
+ ``personal_trace_repo_template`` — never touches the shared org dataset.
825
+ """
826
+ from huggingface_hub import HfApi
827
+ from huggingface_hub.utils import HfHubHTTPError
828
+
829
+ console = get_console()
830
+ if session is None:
831
+ console.print("[bold red]No active session.[/bold red]")
832
+ return
833
+
834
+ repo_id = session._personal_trace_repo_id() if session is not None else None
835
+ if not repo_id:
836
+ if not getattr(config, "share_traces", False):
837
+ console.print(
838
+ "[yellow]share_traces is disabled in config. "
839
+ "Set it to true to publish per-session traces to your HF dataset."
840
+ "[/yellow]"
841
+ )
842
+ return
843
+ if not session.user_id:
844
+ console.print(
845
+ "[yellow]No HF username resolved \u2014 cannot pick a personal "
846
+ "trace repo. Set HF_TOKEN to a token tied to your account.[/yellow]"
847
+ )
848
+ return
849
+ console.print(
850
+ "[yellow]personal_trace_repo_template is unset \u2014 nothing to do.[/yellow]"
851
+ )
852
+ return
853
+
854
+ token = session.hf_token or resolve_hf_token()
855
+ if not token:
856
+ console.print(
857
+ "[bold red]No HF_TOKEN available.[/bold red] Cannot read or change "
858
+ "dataset visibility."
859
+ )
860
+ return
861
+
862
+ api = HfApi(token=token)
863
+ url = f"https://huggingface.co/datasets/{repo_id}"
864
+ target = arg.strip().lower()
865
+
866
+ if not target:
867
+ try:
868
+ info = await asyncio.to_thread(
869
+ api.repo_info, repo_id=repo_id, repo_type="dataset"
870
+ )
871
+ visibility = "private" if getattr(info, "private", False) else "public"
872
+ console.print(f"[bold]Trace dataset:[/bold] {url}")
873
+ console.print(f"[bold]Visibility:[/bold] {visibility}")
874
+ console.print(
875
+ "[dim]Use '/share-traces public' to publish, "
876
+ "'/share-traces private' to lock it back down.[/dim]"
877
+ )
878
+ except HfHubHTTPError as e:
879
+ if getattr(e.response, "status_code", None) == 404:
880
+ console.print(
881
+ f"[dim]Dataset {repo_id} doesn't exist yet \u2014 it'll be "
882
+ "created (private) on the next session save.[/dim]"
883
+ )
884
+ else:
885
+ console.print(f"[bold red]Hub error:[/bold red] {e}")
886
+ except Exception as e:
887
+ console.print(f"[bold red]Could not fetch dataset info:[/bold red] {e}")
888
+ return
889
+
890
+ if target not in {"public", "private"}:
891
+ console.print(
892
+ f"[bold red]Unknown argument:[/bold red] {target}. "
893
+ "Expected 'public' or 'private'."
894
+ )
895
+ return
896
+
897
+ private = target == "private"
898
+ try:
899
+ # Idempotent — create if missing so first-flip works even before any
900
+ # session has been saved yet.
901
+ await asyncio.to_thread(
902
+ api.create_repo,
903
+ repo_id=repo_id,
904
+ repo_type="dataset",
905
+ private=private,
906
+ token=token,
907
+ exist_ok=True,
908
+ )
909
+ await asyncio.to_thread(
910
+ api.update_repo_settings,
911
+ repo_id=repo_id,
912
+ repo_type="dataset",
913
+ private=private,
914
+ token=token,
915
+ )
916
+ except Exception as e:
917
+ console.print(f"[bold red]Failed to update visibility:[/bold red] {e}")
918
+ return
919
+
920
+ label = "PUBLIC" if not private else "private"
921
+ console.print(f"[green]Dataset is now {label}.[/green] {url}")
922
+
923
+
924
  async def main(model: str | None = None):
925
  """Interactive chat with the agent"""
926
 
agent/utils/terminal_display.py CHANGED
@@ -425,6 +425,7 @@ HELP_TEXT = f"""\
425
  {_I} [cyan]/effort[/cyan] [level] Reasoning effort (minimal|low|medium|high|xhigh|max|off)
426
  {_I} [cyan]/yolo[/cyan] Toggle auto-approve mode
427
  {_I} [cyan]/status[/cyan] Current model & turn count
 
428
  {_I} [cyan]/quit[/cyan] Exit"""
429
 
430
 
 
425
  {_I} [cyan]/effort[/cyan] [level] Reasoning effort (minimal|low|medium|high|xhigh|max|off)
426
  {_I} [cyan]/yolo[/cyan] Toggle auto-approve mode
427
  {_I} [cyan]/status[/cyan] Current model & turn count
428
+ {_I} [cyan]/share-traces[/cyan] [public|private] Show/flip visibility of your HF trace dataset
429
  {_I} [cyan]/quit[/cyan] Exit"""
430
 
431
 
backend/dependencies.py CHANGED
@@ -138,6 +138,38 @@ async def _extract_user_from_token(token: str) -> dict[str, Any] | None:
138
  return user
139
 
140
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
141
  async def check_org_membership(token: str, org_name: str) -> bool:
142
  """Check if the token owner belongs to an HF org. Only caches positive results."""
143
  now = time.time()
@@ -170,10 +202,10 @@ async def get_current_user(request: Request) -> dict[str, Any]:
170
  1. Authorization: Bearer <token> header
171
  2. hf_access_token cookie
172
 
173
- In dev mode (AUTH_ENABLED=False), returns a default dev user.
174
  """
175
  if not AUTH_ENABLED:
176
- return DEV_USER
177
 
178
  # Try Authorization header
179
  token = bearer_token_from_header(request.headers.get("Authorization", ""))
 
138
  return user
139
 
140
 
141
+ async def _dev_user_from_env() -> dict[str, Any]:
142
+ """Use HF_TOKEN as the dev identity when available.
143
+
144
+ Local dev often runs without OAuth, but session trace uploads still need a
145
+ real HF namespace. Deriving the dev user from HF_TOKEN keeps local uploads
146
+ pointed at the token owner's dataset instead of dev/ml-intern-sessions.
147
+ """
148
+ token = os.environ.get("HF_TOKEN", "")
149
+ if not token:
150
+ return DEV_USER
151
+
152
+ whoami = await fetch_whoami_v2(token)
153
+ if not isinstance(whoami, dict):
154
+ return DEV_USER
155
+
156
+ username = None
157
+ for key in ("name", "user", "preferred_username"):
158
+ value = whoami.get(key)
159
+ if isinstance(value, str) and value:
160
+ username = value
161
+ break
162
+ if not username:
163
+ return DEV_USER
164
+
165
+ return {
166
+ "user_id": username,
167
+ "username": username,
168
+ "authenticated": True,
169
+ "plan": await _fetch_user_plan(token),
170
+ }
171
+
172
+
173
  async def check_org_membership(token: str, org_name: str) -> bool:
174
  """Check if the token owner belongs to an HF org. Only caches positive results."""
175
  now = time.time()
 
202
  1. Authorization: Bearer <token> header
203
  2. hf_access_token cookie
204
 
205
+ In dev mode (AUTH_ENABLED=False), uses HF_TOKEN as the user when possible.
206
  """
207
  if not AUTH_ENABLED:
208
+ return await _dev_user_from_env()
209
 
210
  # Try Authorization header
211
  token = bearer_token_from_header(request.headers.get("Authorization", ""))
configs/cli_agent_config.json CHANGED
@@ -2,6 +2,8 @@
2
  "model_name": "anthropic/claude-opus-4-6",
3
  "save_sessions": true,
4
  "session_dataset_repo": "smolagents/ml-intern-sessions",
 
 
5
  "yolo_mode": false,
6
  "confirm_cpu_jobs": true,
7
  "auto_file_upload": true,
 
2
  "model_name": "anthropic/claude-opus-4-6",
3
  "save_sessions": true,
4
  "session_dataset_repo": "smolagents/ml-intern-sessions",
5
+ "share_traces": true,
6
+ "personal_trace_repo_template": "{hf_user}/ml-intern-sessions",
7
  "yolo_mode": false,
8
  "confirm_cpu_jobs": true,
9
  "auto_file_upload": true,
configs/frontend_agent_config.json CHANGED
@@ -2,6 +2,8 @@
2
  "model_name": "${ML_INTERN_CLAUDE_MODEL_ID:-bedrock/us.anthropic.claude-opus-4-6-v1}",
3
  "save_sessions": true,
4
  "session_dataset_repo": "smolagents/ml-intern-sessions",
 
 
5
  "yolo_mode": false,
6
  "confirm_cpu_jobs": true,
7
  "auto_file_upload": true,
 
2
  "model_name": "${ML_INTERN_CLAUDE_MODEL_ID:-bedrock/us.anthropic.claude-opus-4-6-v1}",
3
  "save_sessions": true,
4
  "session_dataset_repo": "smolagents/ml-intern-sessions",
5
+ "share_traces": true,
6
+ "personal_trace_repo_template": "{hf_user}/ml-intern-sessions",
7
  "yolo_mode": false,
8
  "confirm_cpu_jobs": true,
9
  "auto_file_upload": true,
tests/unit/test_dangling_tool_calls.py CHANGED
@@ -28,6 +28,7 @@ def _make_cm() -> ContextManager:
28
  cm.running_context_usage = 0
29
  cm.untouched_messages = 5
30
  cm.items = [Message(role="system", content="system")]
 
31
  return cm
32
 
33
 
@@ -66,6 +67,15 @@ def test_no_orphan_means_no_stub():
66
  assert tool_msgs[0].content == "ok"
67
 
68
 
 
 
 
 
 
 
 
 
 
69
  def test_multiple_dangling_tool_calls_in_one_assistant_message_are_all_patched():
70
  cm = _make_cm()
71
  cm.items.extend([
 
28
  cm.running_context_usage = 0
29
  cm.untouched_messages = 5
30
  cm.items = [Message(role="system", content="system")]
31
+ cm.on_message_added = None
32
  return cm
33
 
34
 
 
67
  assert tool_msgs[0].content == "ok"
68
 
69
 
70
+ def test_add_message_records_message_timestamp():
71
+ cm = _make_cm()
72
+ msg = Message(role="user", content="hello")
73
+
74
+ cm.add_message(msg)
75
+
76
+ assert getattr(cm.items[-1], "timestamp", None)
77
+
78
+
79
  def test_multiple_dangling_tool_calls_in_one_assistant_message_are_all_patched():
80
  cm = _make_cm()
81
  cm.items.extend([
tests/unit/test_session_uploader.py ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+
3
+ from agent.core.session_uploader import (
4
+ _PERSONAL_TOKEN_ENV,
5
+ _resolve_token,
6
+ _update_upload_status,
7
+ _upload_dataset_card,
8
+ _write_claude_code_payload,
9
+ _write_row_payload,
10
+ dataset_card_readme,
11
+ to_claude_code_jsonl,
12
+ )
13
+
14
+ HF_SECRET = "hf_" + "a" * 30
15
+ ANTHROPIC_SECRET = "sk-ant-" + "b" * 24
16
+ GITHUB_SECRET = "ghp_" + "c" * 36
17
+
18
+
19
+ def test_dataset_card_readme_has_metadata_and_public_warning():
20
+ readme = dataset_card_readme("lewtun/ml-intern-sessions")
21
+
22
+ assert readme.startswith("---\n")
23
+ assert 'pretty_name: "ML Intern Session Traces"' in readme
24
+ assert "task_categories:\n- text-generation" in readme
25
+ assert "- agent-traces" in readme
26
+ assert "- coding-agent" in readme
27
+ assert "- ml-intern" in readme
28
+ assert 'path: "sessions/**/*.jsonl"' in readme
29
+ assert "ML Intern demo: https://smolagents-ml-intern.hf.space" in readme
30
+ assert "ML Intern CLI: https://github.com/huggingface/ml-intern" in readme
31
+ assert "Repository: https://huggingface.co/datasets/" not in readme
32
+ assert (
33
+ "**WARNING: no comprehensive redaction or human review has been performed for this dataset.**"
34
+ in readme
35
+ )
36
+ assert "automated best-effort scrubbing" in readme
37
+ assert "Do not make this dataset public" in readme
38
+
39
+
40
+ def test_upload_dataset_card_only_for_claude_code_format():
41
+ class FakeApi:
42
+ def __init__(self):
43
+ self.calls = []
44
+
45
+ def upload_file(self, **kwargs):
46
+ self.calls.append(kwargs)
47
+
48
+ api = FakeApi()
49
+
50
+ _upload_dataset_card(api, "lewtun/ml-intern-sessions", "hf_token", "row")
51
+ assert api.calls == []
52
+
53
+ _upload_dataset_card(api, "lewtun/ml-intern-sessions", "hf_token", "claude_code")
54
+ assert len(api.calls) == 1
55
+ assert api.calls[0]["path_in_repo"] == "README.md"
56
+ assert api.calls[0]["repo_id"] == "lewtun/ml-intern-sessions"
57
+ assert api.calls[0]["repo_type"] == "dataset"
58
+ assert api.calls[0]["token"] == "hf_token"
59
+ assert b"no comprehensive redaction or human review" in api.calls[0]["path_or_fileobj"]
60
+
61
+
62
+ def test_personal_token_env_takes_precedence_for_hf_token(monkeypatch):
63
+ monkeypatch.setenv(_PERSONAL_TOKEN_ENV, "personal-token")
64
+ monkeypatch.setenv("HF_TOKEN", "env-token")
65
+
66
+ assert _resolve_token("HF_TOKEN") == "personal-token"
67
+
68
+
69
+ def test_update_upload_status_preserves_other_uploader_fields(tmp_path):
70
+ session_file = tmp_path / "session_123.json"
71
+ session_file.write_text(
72
+ json.dumps(
73
+ {
74
+ "session_id": "123",
75
+ "upload_status": "success",
76
+ "upload_url": "https://huggingface.co/datasets/org/sessions",
77
+ "personal_upload_status": "pending",
78
+ }
79
+ )
80
+ )
81
+
82
+ _update_upload_status(
83
+ str(session_file),
84
+ "personal_upload_status",
85
+ "personal_upload_url",
86
+ "success",
87
+ "https://huggingface.co/datasets/user/ml-intern-sessions",
88
+ )
89
+
90
+ data = json.loads(session_file.read_text())
91
+ assert data["upload_status"] == "success"
92
+ assert data["upload_url"] == "https://huggingface.co/datasets/org/sessions"
93
+ assert data["personal_upload_status"] == "success"
94
+ assert (
95
+ data["personal_upload_url"]
96
+ == "https://huggingface.co/datasets/user/ml-intern-sessions"
97
+ )
98
+
99
+
100
+ def test_claude_code_jsonl_uses_message_timestamps():
101
+ events = to_claude_code_jsonl(
102
+ {
103
+ "session_id": "session-123",
104
+ "model_name": "anthropic/claude-opus-4-6",
105
+ "session_start_time": "2026-01-01T00:00:00",
106
+ "messages": [
107
+ {
108
+ "role": "user",
109
+ "content": "hello",
110
+ "timestamp": "2026-01-01T00:00:01",
111
+ },
112
+ {
113
+ "role": "assistant",
114
+ "content": "hi",
115
+ "timestamp": "2026-01-01T00:00:02",
116
+ },
117
+ {
118
+ "role": "tool",
119
+ "tool_call_id": "call-1",
120
+ "content": "ok",
121
+ "timestamp": "2026-01-01T00:00:03",
122
+ },
123
+ ],
124
+ }
125
+ )
126
+
127
+ assert [event["timestamp"] for event in events] == [
128
+ "2026-01-01T00:00:01",
129
+ "2026-01-01T00:00:02",
130
+ "2026-01-01T00:00:03",
131
+ ]
132
+
133
+
134
+ def test_row_payload_scrubs_messages_events_and_tools(tmp_path):
135
+ tmp_file = tmp_path / "row.jsonl"
136
+ data = {
137
+ "session_id": "session-123",
138
+ "user_id": "lewtun",
139
+ "session_start_time": "2026-01-01T00:00:00",
140
+ "session_end_time": "2026-01-01T00:00:03",
141
+ "model_name": "anthropic/claude-opus-4-6",
142
+ "total_cost_usd": 0.01,
143
+ "messages": [{"role": "user", "content": f"token {HF_SECRET}"}],
144
+ "events": [{"type": "debug", "content": f"key {ANTHROPIC_SECRET}"}],
145
+ "tools": [{"name": "bash", "env": f"GITHUB_TOKEN={GITHUB_SECRET}"}],
146
+ }
147
+
148
+ _write_row_payload(data, str(tmp_file))
149
+
150
+ payload = tmp_file.read_text()
151
+ assert HF_SECRET not in payload
152
+ assert ANTHROPIC_SECRET not in payload
153
+ assert GITHUB_SECRET not in payload
154
+ assert "[REDACTED_HF_TOKEN]" in payload
155
+ assert "[REDACTED_ANTHROPIC_KEY]" in payload
156
+ assert "GITHUB_TOKEN=[REDACTED]" in payload
157
+
158
+
159
+ def test_claude_code_payload_scrubs_messages_before_conversion(tmp_path):
160
+ tmp_file = tmp_path / "claude_code.jsonl"
161
+ data = {
162
+ "session_id": "session-123",
163
+ "model_name": "anthropic/claude-opus-4-6",
164
+ "session_start_time": "2026-01-01T00:00:00",
165
+ "messages": [
166
+ {
167
+ "role": "user",
168
+ "content": f"token {HF_SECRET}",
169
+ "timestamp": "2026-01-01T00:00:01",
170
+ },
171
+ {
172
+ "role": "assistant",
173
+ "content": "running tool",
174
+ "tool_calls": [
175
+ {
176
+ "id": "call-1",
177
+ "function": {
178
+ "name": "bash",
179
+ "arguments": json.dumps({"key": ANTHROPIC_SECRET}),
180
+ },
181
+ }
182
+ ],
183
+ "timestamp": "2026-01-01T00:00:02",
184
+ },
185
+ {
186
+ "role": "tool",
187
+ "tool_call_id": "call-1",
188
+ "content": f"GITHUB_TOKEN={GITHUB_SECRET}",
189
+ "timestamp": "2026-01-01T00:00:03",
190
+ },
191
+ ],
192
+ }
193
+
194
+ _write_claude_code_payload(data, str(tmp_file))
195
+
196
+ payload = tmp_file.read_text()
197
+ assert HF_SECRET not in payload
198
+ assert ANTHROPIC_SECRET not in payload
199
+ assert GITHUB_SECRET not in payload
200
+ assert "[REDACTED_HF_TOKEN]" in payload
201
+ assert "[REDACTED_ANTHROPIC_KEY]" in payload
202
+ assert "GITHUB_TOKEN=[REDACTED]" in payload