Auto-share session traces to a private HF dataset (#193)
Browse files* feat: add share_traces toggle and per-user trace repo template
* feat: support Claude Code JSONL format and per-target auth
* feat: dual-upload sessions to private user trace dataset
* chore: retry personal trace uploads on booting
* feat: add /share-traces command to flip dataset visibility
* docs: document HF trace auto-share and /share-traces
* Use HF token owner for local dev auth
Co-authored-by: Codex <codex@openai.com>
* Rename personal session trace dataset
Co-authored-by: Codex <codex@openai.com>
* Add session dataset card metadata
Co-authored-by: Codex <codex@openai.com>
* Fix session trace upload review issues
Co-authored-by: OpenAI Codex <codex@openai.com>
* Preserve secret scrubbing before trace uploads
Co-authored-by: OpenAI Codex <codex@openai.com>
* Link ML Intern demo in dataset card
Co-authored-by: OpenAI Codex <codex@openai.com>
---------
Co-authored-by: lewtun <lewis.c.tunstall@gmail.com>
Co-authored-by: Codex <codex@openai.com>
- README.md +35 -0
- agent/config.py +7 -0
- agent/context_manager/manager.py +8 -1
- agent/core/agent_loop.py +6 -2
- agent/core/session.py +138 -26
- agent/core/session_uploader.py +541 -116
- agent/main.py +110 -0
- agent/utils/terminal_display.py +1 -0
- backend/dependencies.py +34 -2
- configs/cli_agent_config.json +2 -0
- configs/frontend_agent_config.json +2 -0
- tests/unit/test_dangling_tool_calls.py +10 -0
- tests/unit/test_session_uploader.py +202 -0
|
@@ -56,6 +56,41 @@ ml-intern --max-iterations 100 "your prompt"
|
|
| 56 |
ml-intern --no-stream "your prompt"
|
| 57 |
```
|
| 58 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
## Supported Gateways
|
| 60 |
|
| 61 |
ML Intern currently supports one-way notification gateways from CLI sessions.
|
|
|
|
| 56 |
ml-intern --no-stream "your prompt"
|
| 57 |
```
|
| 58 |
|
| 59 |
+
## Sharing Traces
|
| 60 |
+
|
| 61 |
+
Every session is auto-uploaded to your **own private Hugging Face dataset**
|
| 62 |
+
in [Claude Code JSONL format](https://huggingface.co/changelog/agent-trace-viewer),
|
| 63 |
+
which the HF Agent Trace Viewer auto-detects so you can browse turns, tool
|
| 64 |
+
calls, and model responses directly on the Hub.
|
| 65 |
+
|
| 66 |
+
By default the dataset is named `{your-hf-username}/ml-intern-sessions` and is
|
| 67 |
+
**created private**. You can flip it to public from inside the CLI:
|
| 68 |
+
|
| 69 |
+
```bash
|
| 70 |
+
/share-traces # show current visibility + dataset URL
|
| 71 |
+
/share-traces public # publish (anyone can view)
|
| 72 |
+
/share-traces private # lock it back down
|
| 73 |
+
```
|
| 74 |
+
|
| 75 |
+
You can also flip visibility from the dataset page on huggingface.co — the
|
| 76 |
+
agent honours whatever you set there for subsequent uploads.
|
| 77 |
+
|
| 78 |
+
To opt out entirely, set in your CLI config (e.g. `configs/cli_agent_config.json`
|
| 79 |
+
or `~/.config/ml-intern/cli_agent_config.json`):
|
| 80 |
+
|
| 81 |
+
```json
|
| 82 |
+
{ "share_traces": false }
|
| 83 |
+
```
|
| 84 |
+
|
| 85 |
+
To override the destination repo, set:
|
| 86 |
+
|
| 87 |
+
```json
|
| 88 |
+
{ "personal_trace_repo_template": "{hf_user}/my-custom-traces" }
|
| 89 |
+
```
|
| 90 |
+
|
| 91 |
+
The shared `smolagents/ml-intern-sessions` dataset is unrelated and only
|
| 92 |
+
receives anonymized telemetry rows used by the backend KPI scheduler.
|
| 93 |
+
|
| 94 |
## Supported Gateways
|
| 95 |
|
| 96 |
ML Intern currently supports one-way notification gateways from CLI sessions.
|
|
@@ -27,6 +27,13 @@ class Config(BaseModel):
|
|
| 27 |
mcpServers: dict[str, MCPServerConfig] = {}
|
| 28 |
save_sessions: bool = True
|
| 29 |
session_dataset_repo: str = "smolagents/ml-intern-sessions"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
auto_save_interval: int = 1 # Save every N user turns (0 = disabled)
|
| 31 |
# Mid-turn heartbeat: save + upload every N seconds while events are being
|
| 32 |
# emitted. Guards against losing trace data on long-running turns that
|
|
|
|
| 27 |
mcpServers: dict[str, MCPServerConfig] = {}
|
| 28 |
save_sessions: bool = True
|
| 29 |
session_dataset_repo: str = "smolagents/ml-intern-sessions"
|
| 30 |
+
# Per-user private dataset that mirrors each session in Claude Code JSONL
|
| 31 |
+
# format so the HF Agent Trace Viewer auto-renders it
|
| 32 |
+
# (https://huggingface.co/changelog/agent-trace-viewer). Created private
|
| 33 |
+
# on first use; user flips it public via /share-traces. ``{hf_user}`` is
|
| 34 |
+
# substituted at upload time from ``Session.user_id``.
|
| 35 |
+
share_traces: bool = True
|
| 36 |
+
personal_trace_repo_template: str = "{hf_user}/ml-intern-sessions"
|
| 37 |
auto_save_interval: int = 1 # Save every N user turns (0 = disabled)
|
| 38 |
# Mid-turn heartbeat: save + upload every N seconds while events are being
|
| 39 |
# emitted. Guards against losing trace data on long-running turns that
|
|
@@ -240,6 +240,8 @@ class ContextManager:
|
|
| 240 |
"""Add a message to the history"""
|
| 241 |
if token_count:
|
| 242 |
self.running_context_usage = token_count
|
|
|
|
|
|
|
| 243 |
self.items.append(message)
|
| 244 |
if self.on_message_added:
|
| 245 |
self.on_message_added(message)
|
|
@@ -312,6 +314,7 @@ class ContextManager:
|
|
| 312 |
content="Tool was not executed (interrupted or error).",
|
| 313 |
tool_call_id=tc.id,
|
| 314 |
name=tc.function.name,
|
|
|
|
| 315 |
)
|
| 316 |
)
|
| 317 |
|
|
@@ -424,7 +427,11 @@ class ContextManager:
|
|
| 424 |
session=session,
|
| 425 |
kind="compaction",
|
| 426 |
)
|
| 427 |
-
summarized_message = Message(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 428 |
|
| 429 |
# Reconstruct: system + first user msg + summary + recent messages
|
| 430 |
head = [system_msg] if system_msg else []
|
|
|
|
| 240 |
"""Add a message to the history"""
|
| 241 |
if token_count:
|
| 242 |
self.running_context_usage = token_count
|
| 243 |
+
if not getattr(message, "timestamp", None):
|
| 244 |
+
message.timestamp = datetime.now().isoformat()
|
| 245 |
self.items.append(message)
|
| 246 |
if self.on_message_added:
|
| 247 |
self.on_message_added(message)
|
|
|
|
| 314 |
content="Tool was not executed (interrupted or error).",
|
| 315 |
tool_call_id=tc.id,
|
| 316 |
name=tc.function.name,
|
| 317 |
+
timestamp=datetime.now().isoformat(),
|
| 318 |
)
|
| 319 |
)
|
| 320 |
|
|
|
|
| 427 |
session=session,
|
| 428 |
kind="compaction",
|
| 429 |
)
|
| 430 |
+
summarized_message = Message(
|
| 431 |
+
role="assistant",
|
| 432 |
+
content=summary,
|
| 433 |
+
timestamp=datetime.now().isoformat(),
|
| 434 |
+
)
|
| 435 |
|
| 436 |
# Reconstruct: system + first user msg + summary + recent messages
|
| 437 |
head = [system_msg] if system_msg else []
|
|
@@ -1579,10 +1579,14 @@ async def submission_loop(
|
|
| 1579 |
session_holder[0] = session
|
| 1580 |
logger.info("Agent loop started")
|
| 1581 |
|
| 1582 |
-
# Retry any failed uploads from previous sessions (fire-and-forget)
|
|
|
|
|
|
|
| 1583 |
if config and config.save_sessions:
|
| 1584 |
Session.retry_failed_uploads_detached(
|
| 1585 |
-
directory="session_logs",
|
|
|
|
|
|
|
| 1586 |
)
|
| 1587 |
|
| 1588 |
try:
|
|
|
|
| 1579 |
session_holder[0] = session
|
| 1580 |
logger.info("Agent loop started")
|
| 1581 |
|
| 1582 |
+
# Retry any failed uploads from previous sessions (fire-and-forget).
|
| 1583 |
+
# Includes the personal trace repo when enabled so a session that failed
|
| 1584 |
+
# to publish to the user's HF dataset gets a fresh attempt on next run.
|
| 1585 |
if config and config.save_sessions:
|
| 1586 |
Session.retry_failed_uploads_detached(
|
| 1587 |
+
directory="session_logs",
|
| 1588 |
+
repo_id=config.session_dataset_repo,
|
| 1589 |
+
personal_repo_id=session._personal_trace_repo_id(),
|
| 1590 |
)
|
| 1591 |
|
| 1592 |
try:
|
|
@@ -1,6 +1,7 @@
|
|
| 1 |
import asyncio
|
| 2 |
import json
|
| 3 |
import logging
|
|
|
|
| 4 |
import subprocess
|
| 5 |
import sys
|
| 6 |
import uuid
|
|
@@ -456,62 +457,173 @@ class Session:
|
|
| 456 |
logger.error(f"Failed to update local save status: {e}")
|
| 457 |
return False
|
| 458 |
|
| 459 |
-
def
|
| 460 |
-
"""
|
| 461 |
-
Save session locally and spawn detached subprocess for upload (fire-and-forget)
|
| 462 |
-
|
| 463 |
-
Args:
|
| 464 |
-
repo_id: HuggingFace dataset repo ID
|
| 465 |
|
| 466 |
-
Returns
|
| 467 |
-
|
|
|
|
| 468 |
"""
|
| 469 |
-
|
| 470 |
-
|
| 471 |
-
if not
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 472 |
return None
|
| 473 |
|
| 474 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 475 |
try:
|
| 476 |
uploader_script = Path(__file__).parent / "session_uploader.py"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 477 |
|
| 478 |
-
# Use Popen with detached process
|
| 479 |
subprocess.Popen(
|
| 480 |
-
|
| 481 |
stdin=subprocess.DEVNULL,
|
| 482 |
stdout=subprocess.DEVNULL,
|
| 483 |
stderr=subprocess.DEVNULL,
|
|
|
|
| 484 |
start_new_session=True, # Detach from parent
|
| 485 |
)
|
| 486 |
except Exception as e:
|
| 487 |
logger.warning(f"Failed to spawn upload subprocess: {e}")
|
| 488 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 489 |
return local_path
|
| 490 |
|
| 491 |
@staticmethod
|
| 492 |
def retry_failed_uploads_detached(
|
| 493 |
-
directory: str = "session_logs",
|
|
|
|
|
|
|
|
|
|
| 494 |
) -> None:
|
| 495 |
"""
|
| 496 |
-
Spawn detached subprocess to retry failed/pending uploads
|
|
|
|
| 497 |
|
| 498 |
Args:
|
| 499 |
directory: Directory containing session logs
|
| 500 |
-
repo_id: Target dataset repo ID
|
|
|
|
|
|
|
| 501 |
"""
|
| 502 |
-
if not repo_id:
|
| 503 |
return
|
| 504 |
|
| 505 |
try:
|
| 506 |
uploader_script = Path(__file__).parent / "session_uploader.py"
|
| 507 |
|
| 508 |
-
|
| 509 |
-
|
| 510 |
-
|
| 511 |
-
|
| 512 |
-
|
| 513 |
-
|
| 514 |
-
|
| 515 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 516 |
except Exception as e:
|
| 517 |
logger.warning(f"Failed to spawn retry subprocess: {e}")
|
|
|
|
| 1 |
import asyncio
|
| 2 |
import json
|
| 3 |
import logging
|
| 4 |
+
import os
|
| 5 |
import subprocess
|
| 6 |
import sys
|
| 7 |
import uuid
|
|
|
|
| 457 |
logger.error(f"Failed to update local save status: {e}")
|
| 458 |
return False
|
| 459 |
|
| 460 |
+
def _personal_trace_repo_id(self) -> Optional[str]:
|
| 461 |
+
"""Resolve the per-user trace repo id from config + user_id.
|
|
|
|
|
|
|
|
|
|
|
|
|
| 462 |
|
| 463 |
+
Returns ``None`` when sharing is disabled, the user is anonymous,
|
| 464 |
+
or the template is missing — caller skips the personal upload in
|
| 465 |
+
those cases.
|
| 466 |
"""
|
| 467 |
+
if not getattr(self.config, "share_traces", False):
|
| 468 |
+
return None
|
| 469 |
+
if not self.user_id:
|
| 470 |
+
return None
|
| 471 |
+
template = getattr(self.config, "personal_trace_repo_template", None)
|
| 472 |
+
if not template:
|
| 473 |
+
return None
|
| 474 |
+
try:
|
| 475 |
+
return template.format(hf_user=self.user_id)
|
| 476 |
+
except (KeyError, IndexError):
|
| 477 |
+
logger.debug("personal_trace_repo_template format failed: %r", template)
|
| 478 |
return None
|
| 479 |
|
| 480 |
+
def _spawn_uploader(
|
| 481 |
+
self,
|
| 482 |
+
action: str,
|
| 483 |
+
target: str,
|
| 484 |
+
repo_id: str,
|
| 485 |
+
*,
|
| 486 |
+
format: str,
|
| 487 |
+
token_env: Optional[str],
|
| 488 |
+
private: bool,
|
| 489 |
+
token_value: Optional[str] = None,
|
| 490 |
+
) -> None:
|
| 491 |
+
"""Fire-and-forget spawn of ``session_uploader.py`` with the given args."""
|
| 492 |
try:
|
| 493 |
uploader_script = Path(__file__).parent / "session_uploader.py"
|
| 494 |
+
cmd = [
|
| 495 |
+
sys.executable,
|
| 496 |
+
str(uploader_script),
|
| 497 |
+
action,
|
| 498 |
+
target,
|
| 499 |
+
repo_id,
|
| 500 |
+
"--format",
|
| 501 |
+
format,
|
| 502 |
+
"--private",
|
| 503 |
+
"true" if private else "false",
|
| 504 |
+
]
|
| 505 |
+
if token_env:
|
| 506 |
+
cmd.extend(["--token-env", token_env])
|
| 507 |
+
|
| 508 |
+
env = os.environ.copy()
|
| 509 |
+
if token_value:
|
| 510 |
+
env["_ML_INTERN_PERSONAL_TOKEN"] = token_value
|
| 511 |
|
|
|
|
| 512 |
subprocess.Popen(
|
| 513 |
+
cmd,
|
| 514 |
stdin=subprocess.DEVNULL,
|
| 515 |
stdout=subprocess.DEVNULL,
|
| 516 |
stderr=subprocess.DEVNULL,
|
| 517 |
+
env=env,
|
| 518 |
start_new_session=True, # Detach from parent
|
| 519 |
)
|
| 520 |
except Exception as e:
|
| 521 |
logger.warning(f"Failed to spawn upload subprocess: {e}")
|
| 522 |
|
| 523 |
+
def save_and_upload_detached(self, repo_id: str) -> Optional[str]:
|
| 524 |
+
"""
|
| 525 |
+
Save session locally and spawn detached subprocess(es) for upload
|
| 526 |
+
(fire-and-forget).
|
| 527 |
+
|
| 528 |
+
Always uploads to the shared org dataset (``repo_id``) in the
|
| 529 |
+
single-row format used by the KPI scheduler. When
|
| 530 |
+
``config.share_traces`` is enabled and a username is known, also
|
| 531 |
+
uploads to the user's personal private dataset in Claude Code JSONL
|
| 532 |
+
format so the HF Agent Trace Viewer auto-renders it.
|
| 533 |
+
|
| 534 |
+
Args:
|
| 535 |
+
repo_id: HuggingFace dataset repo ID for the org/KPI upload.
|
| 536 |
+
|
| 537 |
+
Returns:
|
| 538 |
+
Path to local save file
|
| 539 |
+
"""
|
| 540 |
+
local_path = self.save_trajectory_local(upload_status="pending")
|
| 541 |
+
if not local_path:
|
| 542 |
+
return None
|
| 543 |
+
|
| 544 |
+
self._spawn_uploader(
|
| 545 |
+
"upload",
|
| 546 |
+
local_path,
|
| 547 |
+
repo_id,
|
| 548 |
+
format="row",
|
| 549 |
+
token_env=None, # default org token chain
|
| 550 |
+
private=False,
|
| 551 |
+
)
|
| 552 |
+
|
| 553 |
+
personal_repo = self._personal_trace_repo_id()
|
| 554 |
+
if personal_repo:
|
| 555 |
+
# User's own HF_TOKEN write-scoped to their namespace.
|
| 556 |
+
self._spawn_uploader(
|
| 557 |
+
"upload",
|
| 558 |
+
local_path,
|
| 559 |
+
personal_repo,
|
| 560 |
+
format="claude_code",
|
| 561 |
+
token_env="HF_TOKEN",
|
| 562 |
+
token_value=self.hf_token,
|
| 563 |
+
private=True,
|
| 564 |
+
)
|
| 565 |
+
|
| 566 |
return local_path
|
| 567 |
|
| 568 |
@staticmethod
|
| 569 |
def retry_failed_uploads_detached(
|
| 570 |
+
directory: str = "session_logs",
|
| 571 |
+
repo_id: Optional[str] = None,
|
| 572 |
+
*,
|
| 573 |
+
personal_repo_id: Optional[str] = None,
|
| 574 |
) -> None:
|
| 575 |
"""
|
| 576 |
+
Spawn detached subprocess(es) to retry failed/pending uploads
|
| 577 |
+
(fire-and-forget).
|
| 578 |
|
| 579 |
Args:
|
| 580 |
directory: Directory containing session logs
|
| 581 |
+
repo_id: Target dataset repo ID for the shared org/KPI upload.
|
| 582 |
+
personal_repo_id: Per-user dataset for Claude-Code-format
|
| 583 |
+
retries. ``None`` skips the personal retry pass.
|
| 584 |
"""
|
| 585 |
+
if not repo_id and not personal_repo_id:
|
| 586 |
return
|
| 587 |
|
| 588 |
try:
|
| 589 |
uploader_script = Path(__file__).parent / "session_uploader.py"
|
| 590 |
|
| 591 |
+
if repo_id:
|
| 592 |
+
subprocess.Popen(
|
| 593 |
+
[
|
| 594 |
+
sys.executable,
|
| 595 |
+
str(uploader_script),
|
| 596 |
+
"retry",
|
| 597 |
+
directory,
|
| 598 |
+
repo_id,
|
| 599 |
+
"--format",
|
| 600 |
+
"row",
|
| 601 |
+
],
|
| 602 |
+
stdin=subprocess.DEVNULL,
|
| 603 |
+
stdout=subprocess.DEVNULL,
|
| 604 |
+
stderr=subprocess.DEVNULL,
|
| 605 |
+
start_new_session=True,
|
| 606 |
+
)
|
| 607 |
+
|
| 608 |
+
if personal_repo_id:
|
| 609 |
+
subprocess.Popen(
|
| 610 |
+
[
|
| 611 |
+
sys.executable,
|
| 612 |
+
str(uploader_script),
|
| 613 |
+
"retry",
|
| 614 |
+
directory,
|
| 615 |
+
personal_repo_id,
|
| 616 |
+
"--format",
|
| 617 |
+
"claude_code",
|
| 618 |
+
"--token-env",
|
| 619 |
+
"HF_TOKEN",
|
| 620 |
+
"--private",
|
| 621 |
+
"true",
|
| 622 |
+
],
|
| 623 |
+
stdin=subprocess.DEVNULL,
|
| 624 |
+
stdout=subprocess.DEVNULL,
|
| 625 |
+
stderr=subprocess.DEVNULL,
|
| 626 |
+
start_new_session=True,
|
| 627 |
+
)
|
| 628 |
except Exception as e:
|
| 629 |
logger.warning(f"Failed to spawn retry subprocess: {e}")
|
|
@@ -3,39 +3,455 @@
|
|
| 3 |
Standalone script for uploading session trajectories to HuggingFace.
|
| 4 |
This runs as a separate process to avoid blocking the main agent.
|
| 5 |
Uses individual file uploads to avoid race conditions.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
"""
|
| 7 |
|
|
|
|
|
|
|
| 8 |
import json
|
| 9 |
import os
|
| 10 |
import sys
|
| 11 |
from datetime import datetime
|
| 12 |
from pathlib import Path
|
|
|
|
| 13 |
|
| 14 |
from dotenv import load_dotenv
|
| 15 |
|
| 16 |
load_dotenv()
|
| 17 |
|
| 18 |
-
# Token for
|
| 19 |
-
# backend/kpis_scheduler.py so one write-scoped token on the
|
| 20 |
-
# telemetry dataset. Never hardcode tokens in source.
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
or ""
|
| 26 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
|
| 28 |
|
| 29 |
def upload_session_as_file(
|
| 30 |
-
session_file: str,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
) -> bool:
|
| 32 |
-
"""
|
| 33 |
-
Upload a single session as an individual JSONL file (no race conditions)
|
| 34 |
|
| 35 |
Args:
|
| 36 |
session_file: Path to local session JSON file
|
| 37 |
repo_id: HuggingFace dataset repo ID
|
| 38 |
max_retries: Number of retry attempts
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
|
| 40 |
Returns:
|
| 41 |
True if successful, False otherwise
|
|
@@ -46,96 +462,60 @@ def upload_session_as_file(
|
|
| 46 |
print("Error: huggingface_hub library not available", file=sys.stderr)
|
| 47 |
return False
|
| 48 |
|
|
|
|
|
|
|
|
|
|
| 49 |
try:
|
| 50 |
-
|
| 51 |
-
with open(session_file, "r") as f:
|
| 52 |
-
data = json.load(f)
|
| 53 |
|
| 54 |
-
#
|
| 55 |
-
|
| 56 |
-
if upload_status == "success":
|
| 57 |
return True
|
| 58 |
|
| 59 |
-
|
| 60 |
-
hf_token = _SESSION_TOKEN
|
| 61 |
if not hf_token:
|
| 62 |
-
|
| 63 |
-
data["upload_status"] = "failed"
|
| 64 |
-
with open(session_file, "w") as f:
|
| 65 |
-
json.dump(data, f, indent=2)
|
| 66 |
return False
|
| 67 |
|
| 68 |
-
#
|
| 69 |
-
# before they leave the local disk. Best-effort regex-based redaction —
|
| 70 |
-
# see agent/core/redact.py for the patterns covered.
|
| 71 |
-
try:
|
| 72 |
-
from agent.core.redact import scrub # type: ignore
|
| 73 |
-
except Exception:
|
| 74 |
-
# Fallback for environments where the agent package isn't importable
|
| 75 |
-
# (shouldn't happen in our subprocess, but be defensive).
|
| 76 |
-
import importlib.util
|
| 77 |
-
_spec = importlib.util.spec_from_file_location(
|
| 78 |
-
"_redact",
|
| 79 |
-
Path(__file__).parent / "redact.py",
|
| 80 |
-
)
|
| 81 |
-
_mod = importlib.util.module_from_spec(_spec)
|
| 82 |
-
_spec.loader.exec_module(_mod) # type: ignore
|
| 83 |
-
scrub = _mod.scrub
|
| 84 |
-
scrubbed_messages = scrub(data["messages"])
|
| 85 |
-
scrubbed_events = scrub(data["events"])
|
| 86 |
-
scrubbed_tools = scrub(data.get("tools") or [])
|
| 87 |
-
|
| 88 |
-
# Prepare JSONL content (single line)
|
| 89 |
-
# Store messages/events/tools as JSON strings to avoid schema conflicts
|
| 90 |
-
# across sessions with different tool rosters.
|
| 91 |
-
session_row = {
|
| 92 |
-
"session_id": data["session_id"],
|
| 93 |
-
"user_id": data.get("user_id"),
|
| 94 |
-
"session_start_time": data["session_start_time"],
|
| 95 |
-
"session_end_time": data["session_end_time"],
|
| 96 |
-
"model_name": data["model_name"],
|
| 97 |
-
"total_cost_usd": data.get("total_cost_usd"),
|
| 98 |
-
"messages": json.dumps(scrubbed_messages),
|
| 99 |
-
"events": json.dumps(scrubbed_events),
|
| 100 |
-
"tools": json.dumps(scrubbed_tools),
|
| 101 |
-
}
|
| 102 |
-
|
| 103 |
-
# Create temporary JSONL file
|
| 104 |
import tempfile
|
| 105 |
|
| 106 |
with tempfile.NamedTemporaryFile(
|
| 107 |
mode="w", suffix=".jsonl", delete=False
|
| 108 |
) as tmp:
|
| 109 |
-
json.dump(session_row, tmp) # Single line JSON
|
| 110 |
tmp_path = tmp.name
|
| 111 |
|
| 112 |
try:
|
| 113 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 114 |
session_id = data["session_id"]
|
| 115 |
date_str = datetime.fromisoformat(data["session_start_time"]).strftime(
|
| 116 |
"%Y-%m-%d"
|
| 117 |
)
|
| 118 |
repo_path = f"sessions/{date_str}/{session_id}.jsonl"
|
| 119 |
|
| 120 |
-
# Upload with retries
|
| 121 |
api = HfApi()
|
| 122 |
for attempt in range(max_retries):
|
| 123 |
try:
|
| 124 |
-
#
|
|
|
|
|
|
|
| 125 |
try:
|
| 126 |
api.create_repo(
|
| 127 |
repo_id=repo_id,
|
| 128 |
repo_type="dataset",
|
| 129 |
-
private=
|
| 130 |
token=hf_token,
|
| 131 |
-
exist_ok=True,
|
| 132 |
)
|
| 133 |
-
|
| 134 |
except Exception:
|
| 135 |
-
# Repo might already exist, continue
|
| 136 |
pass
|
| 137 |
|
| 138 |
-
|
|
|
|
| 139 |
api.upload_file(
|
| 140 |
path_or_fileobj=tmp_path,
|
| 141 |
path_in_repo=repo_path,
|
|
@@ -145,12 +525,13 @@ def upload_session_as_file(
|
|
| 145 |
commit_message=f"Add session {session_id}",
|
| 146 |
)
|
| 147 |
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
|
|
|
| 154 |
return True
|
| 155 |
|
| 156 |
except Exception:
|
|
@@ -160,14 +541,12 @@ def upload_session_as_file(
|
|
| 160 |
wait_time = 2**attempt
|
| 161 |
time.sleep(wait_time)
|
| 162 |
else:
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
json.dump(data, f, indent=2)
|
| 167 |
return False
|
| 168 |
|
| 169 |
finally:
|
| 170 |
-
# Clean up temp file
|
| 171 |
try:
|
| 172 |
os.unlink(tmp_path)
|
| 173 |
except Exception:
|
|
@@ -178,56 +557,102 @@ def upload_session_as_file(
|
|
| 178 |
return False
|
| 179 |
|
| 180 |
|
| 181 |
-
def retry_failed_uploads(
|
| 182 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 183 |
log_dir = Path(directory)
|
| 184 |
if not log_dir.exists():
|
| 185 |
return
|
| 186 |
|
|
|
|
| 187 |
session_files = list(log_dir.glob("session_*.json"))
|
| 188 |
|
| 189 |
for filepath in session_files:
|
| 190 |
try:
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
#
|
| 197 |
-
|
| 198 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 199 |
|
| 200 |
except Exception:
|
| 201 |
pass
|
| 202 |
|
| 203 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 204 |
if __name__ == "__main__":
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
|
| 212 |
-
|
| 213 |
-
|
| 214 |
-
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 229 |
sys.exit(0)
|
| 230 |
|
| 231 |
-
|
| 232 |
-
|
| 233 |
-
sys.exit(1)
|
|
|
|
| 3 |
Standalone script for uploading session trajectories to HuggingFace.
|
| 4 |
This runs as a separate process to avoid blocking the main agent.
|
| 5 |
Uses individual file uploads to avoid race conditions.
|
| 6 |
+
|
| 7 |
+
Two formats are supported:
|
| 8 |
+
|
| 9 |
+
* ``row`` — single-line JSONL row used by the existing org telemetry/KPI
|
| 10 |
+
pipeline (``smolagents/ml-intern-sessions``). Compatible with
|
| 11 |
+
``backend/kpis_scheduler.py``.
|
| 12 |
+
* ``claude_code`` — one event per line in the Claude Code JSONL schema,
|
| 13 |
+
auto-detected by the HF Agent Trace Viewer
|
| 14 |
+
(https://huggingface.co/changelog/agent-trace-viewer). Used for the
|
| 15 |
+
per-user private dataset (default ``{hf_user}/ml-intern-sessions``).
|
| 16 |
"""
|
| 17 |
|
| 18 |
+
import argparse
|
| 19 |
+
import hashlib
|
| 20 |
import json
|
| 21 |
import os
|
| 22 |
import sys
|
| 23 |
from datetime import datetime
|
| 24 |
from pathlib import Path
|
| 25 |
+
from typing import Any
|
| 26 |
|
| 27 |
from dotenv import load_dotenv
|
| 28 |
|
| 29 |
load_dotenv()
|
| 30 |
|
| 31 |
+
# Token resolution for the org KPI dataset. Fallback chain (least-privilege
|
| 32 |
+
# first) — matches backend/kpis_scheduler.py so one write-scoped token on the
|
| 33 |
+
# Space covers every telemetry dataset. Never hardcode tokens in source.
|
| 34 |
+
_ORG_TOKEN_FALLBACK_CHAIN = (
|
| 35 |
+
"HF_SESSION_UPLOAD_TOKEN",
|
| 36 |
+
"HF_TOKEN",
|
| 37 |
+
"HF_ADMIN_TOKEN",
|
|
|
|
| 38 |
)
|
| 39 |
+
_PERSONAL_TOKEN_ENV = "_ML_INTERN_PERSONAL_TOKEN"
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
def _resolve_token(token_env: str | None) -> str:
|
| 43 |
+
"""Resolve an HF token from env. ``token_env`` overrides the fallback chain."""
|
| 44 |
+
if token_env == "HF_TOKEN":
|
| 45 |
+
try:
|
| 46 |
+
from agent.core.hf_tokens import resolve_hf_token
|
| 47 |
+
|
| 48 |
+
return (
|
| 49 |
+
resolve_hf_token(
|
| 50 |
+
os.environ.get(_PERSONAL_TOKEN_ENV),
|
| 51 |
+
os.environ.get("HF_TOKEN"),
|
| 52 |
+
)
|
| 53 |
+
or ""
|
| 54 |
+
)
|
| 55 |
+
except Exception:
|
| 56 |
+
token = os.environ.get(_PERSONAL_TOKEN_ENV) or os.environ.get("HF_TOKEN")
|
| 57 |
+
return token or ""
|
| 58 |
+
|
| 59 |
+
if token_env:
|
| 60 |
+
return os.environ.get(token_env, "") or ""
|
| 61 |
+
for var in _ORG_TOKEN_FALLBACK_CHAIN:
|
| 62 |
+
val = os.environ.get(var)
|
| 63 |
+
if val:
|
| 64 |
+
return val
|
| 65 |
+
return ""
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
def _scrub(obj: Any) -> Any:
|
| 69 |
+
"""Best-effort regex scrub for HF tokens / API keys before upload."""
|
| 70 |
+
try:
|
| 71 |
+
from agent.core.redact import scrub # type: ignore
|
| 72 |
+
except Exception:
|
| 73 |
+
# Fallback for environments where the agent package isn't importable
|
| 74 |
+
# (shouldn't happen in our subprocess, but be defensive).
|
| 75 |
+
import importlib.util
|
| 76 |
+
|
| 77 |
+
_spec = importlib.util.spec_from_file_location(
|
| 78 |
+
"_redact",
|
| 79 |
+
Path(__file__).parent / "redact.py",
|
| 80 |
+
)
|
| 81 |
+
_mod = importlib.util.module_from_spec(_spec)
|
| 82 |
+
_spec.loader.exec_module(_mod) # type: ignore
|
| 83 |
+
scrub = _mod.scrub
|
| 84 |
+
return scrub(obj)
|
| 85 |
+
|
| 86 |
+
|
| 87 |
+
def _msg_uuid(session_id: str, role: str, idx: int) -> str:
|
| 88 |
+
"""Deterministic UUID-shaped id for a Claude Code message.
|
| 89 |
+
|
| 90 |
+
Uses sha1 of ``session_id::role::idx`` so re-uploads/heartbeats keep the
|
| 91 |
+
parent/child chain stable. Same convention as the example dataset
|
| 92 |
+
https://huggingface.co/datasets/clem/hf-coding-tools-traces.
|
| 93 |
+
"""
|
| 94 |
+
digest = hashlib.sha1(f"{session_id}::{role}::{idx}".encode("utf-8")).hexdigest()
|
| 95 |
+
# Format like a UUID for visual familiarity (32 hex chars w/ dashes).
|
| 96 |
+
return (
|
| 97 |
+
f"{digest[0:8]}-{digest[8:12]}-{digest[12:16]}-"
|
| 98 |
+
f"{digest[16:20]}-{digest[20:32]}"
|
| 99 |
+
)
|
| 100 |
+
|
| 101 |
+
|
| 102 |
+
def _content_to_text(content: Any) -> str:
|
| 103 |
+
"""Best-effort flatten of a litellm/openai content field to plain text."""
|
| 104 |
+
if content is None:
|
| 105 |
+
return ""
|
| 106 |
+
if isinstance(content, str):
|
| 107 |
+
return content
|
| 108 |
+
if isinstance(content, list):
|
| 109 |
+
parts: list[str] = []
|
| 110 |
+
for block in content:
|
| 111 |
+
if isinstance(block, dict):
|
| 112 |
+
text = block.get("text")
|
| 113 |
+
if isinstance(text, str):
|
| 114 |
+
parts.append(text)
|
| 115 |
+
else:
|
| 116 |
+
# Unknown content block — keep round-trippable representation.
|
| 117 |
+
parts.append(json.dumps(block, default=str))
|
| 118 |
+
else:
|
| 119 |
+
parts.append(str(block))
|
| 120 |
+
return "\n".join(parts)
|
| 121 |
+
return str(content)
|
| 122 |
+
|
| 123 |
+
|
| 124 |
+
def _parse_tool_args(raw: Any) -> Any:
|
| 125 |
+
"""Tool call arguments arrive as a JSON-encoded string from LLMs."""
|
| 126 |
+
if isinstance(raw, dict):
|
| 127 |
+
return raw
|
| 128 |
+
if isinstance(raw, str):
|
| 129 |
+
try:
|
| 130 |
+
return json.loads(raw)
|
| 131 |
+
except (json.JSONDecodeError, TypeError):
|
| 132 |
+
return {"_raw": raw}
|
| 133 |
+
return raw
|
| 134 |
+
|
| 135 |
+
|
| 136 |
+
def to_claude_code_jsonl(trajectory: dict) -> list[dict]:
|
| 137 |
+
"""Convert an internal trajectory dict to Claude Code JSONL events.
|
| 138 |
+
|
| 139 |
+
Schema reference (per the HF Agent Trace Viewer auto-detector):
|
| 140 |
+
|
| 141 |
+
{"type":"user","message":{"role":"user","content":"..."},
|
| 142 |
+
"uuid":"...","parentUuid":null,"sessionId":"...","timestamp":"..."}
|
| 143 |
+
{"type":"assistant",
|
| 144 |
+
"message":{"role":"assistant","model":"...",
|
| 145 |
+
"content":[{"type":"text","text":"..."},
|
| 146 |
+
{"type":"tool_use","id":"...","name":"...","input":{...}}]},
|
| 147 |
+
"uuid":"...","parentUuid":"<prev>","sessionId":"...","timestamp":"..."}
|
| 148 |
+
{"type":"user","message":{"role":"user",
|
| 149 |
+
"content":[{"type":"tool_result",
|
| 150 |
+
"tool_use_id":"...","content":"..."}]},
|
| 151 |
+
"uuid":"...","parentUuid":"<prev>","sessionId":"...","timestamp":"..."}
|
| 152 |
+
|
| 153 |
+
System messages are skipped (they're not part of the viewer schema and
|
| 154 |
+
contain large prompts that pollute the trace viewer UI).
|
| 155 |
+
"""
|
| 156 |
+
session_id = trajectory["session_id"]
|
| 157 |
+
model_name = trajectory.get("model_name") or ""
|
| 158 |
+
fallback_timestamp = (
|
| 159 |
+
trajectory.get("session_start_time") or datetime.now().isoformat()
|
| 160 |
+
)
|
| 161 |
+
messages: list[dict] = trajectory.get("messages") or []
|
| 162 |
+
|
| 163 |
+
out: list[dict] = []
|
| 164 |
+
parent_uuid: str | None = None
|
| 165 |
+
|
| 166 |
+
for idx, msg in enumerate(messages):
|
| 167 |
+
if not isinstance(msg, dict):
|
| 168 |
+
continue
|
| 169 |
+
role = msg.get("role")
|
| 170 |
+
if role == "system":
|
| 171 |
+
continue
|
| 172 |
+
timestamp = msg.get("timestamp") or fallback_timestamp
|
| 173 |
+
|
| 174 |
+
if role == "user":
|
| 175 |
+
content = _content_to_text(msg.get("content"))
|
| 176 |
+
event_uuid = _msg_uuid(session_id, "user", idx)
|
| 177 |
+
out.append(
|
| 178 |
+
{
|
| 179 |
+
"type": "user",
|
| 180 |
+
"message": {"role": "user", "content": content},
|
| 181 |
+
"uuid": event_uuid,
|
| 182 |
+
"parentUuid": parent_uuid,
|
| 183 |
+
"sessionId": session_id,
|
| 184 |
+
"timestamp": timestamp,
|
| 185 |
+
}
|
| 186 |
+
)
|
| 187 |
+
parent_uuid = event_uuid
|
| 188 |
+
|
| 189 |
+
elif role == "assistant":
|
| 190 |
+
content_text = _content_to_text(msg.get("content"))
|
| 191 |
+
content_blocks: list[dict] = []
|
| 192 |
+
if content_text:
|
| 193 |
+
content_blocks.append({"type": "text", "text": content_text})
|
| 194 |
+
for tc in msg.get("tool_calls") or []:
|
| 195 |
+
if not isinstance(tc, dict):
|
| 196 |
+
continue
|
| 197 |
+
fn = tc.get("function") or {}
|
| 198 |
+
content_blocks.append(
|
| 199 |
+
{
|
| 200 |
+
"type": "tool_use",
|
| 201 |
+
"id": tc.get("id") or "",
|
| 202 |
+
"name": fn.get("name") or "",
|
| 203 |
+
"input": _parse_tool_args(fn.get("arguments")),
|
| 204 |
+
}
|
| 205 |
+
)
|
| 206 |
+
if not content_blocks:
|
| 207 |
+
# Edge case: empty assistant turn (shouldn't normally happen,
|
| 208 |
+
# but skip rather than emit an empty content array which
|
| 209 |
+
# confuses the viewer).
|
| 210 |
+
continue
|
| 211 |
+
event_uuid = _msg_uuid(session_id, "assistant", idx)
|
| 212 |
+
out.append(
|
| 213 |
+
{
|
| 214 |
+
"type": "assistant",
|
| 215 |
+
"message": {
|
| 216 |
+
"role": "assistant",
|
| 217 |
+
"model": model_name,
|
| 218 |
+
"content": content_blocks,
|
| 219 |
+
},
|
| 220 |
+
"uuid": event_uuid,
|
| 221 |
+
"parentUuid": parent_uuid,
|
| 222 |
+
"sessionId": session_id,
|
| 223 |
+
"timestamp": timestamp,
|
| 224 |
+
}
|
| 225 |
+
)
|
| 226 |
+
parent_uuid = event_uuid
|
| 227 |
+
|
| 228 |
+
elif role == "tool":
|
| 229 |
+
tool_call_id = msg.get("tool_call_id") or ""
|
| 230 |
+
content_text = _content_to_text(msg.get("content"))
|
| 231 |
+
event_uuid = _msg_uuid(session_id, "tool", idx)
|
| 232 |
+
out.append(
|
| 233 |
+
{
|
| 234 |
+
"type": "user",
|
| 235 |
+
"message": {
|
| 236 |
+
"role": "user",
|
| 237 |
+
"content": [
|
| 238 |
+
{
|
| 239 |
+
"type": "tool_result",
|
| 240 |
+
"tool_use_id": tool_call_id,
|
| 241 |
+
"content": content_text,
|
| 242 |
+
}
|
| 243 |
+
],
|
| 244 |
+
},
|
| 245 |
+
"uuid": event_uuid,
|
| 246 |
+
"parentUuid": parent_uuid,
|
| 247 |
+
"sessionId": session_id,
|
| 248 |
+
"timestamp": timestamp,
|
| 249 |
+
}
|
| 250 |
+
)
|
| 251 |
+
parent_uuid = event_uuid
|
| 252 |
+
|
| 253 |
+
return out
|
| 254 |
+
|
| 255 |
+
|
| 256 |
+
def _scrub_session_for_upload(data: dict) -> dict:
|
| 257 |
+
"""Best-effort scrub of transcript fields before any upload temp file."""
|
| 258 |
+
scrubbed = dict(data)
|
| 259 |
+
scrubbed["messages"] = _scrub(data.get("messages") or [])
|
| 260 |
+
scrubbed["events"] = _scrub(data.get("events") or [])
|
| 261 |
+
scrubbed["tools"] = _scrub(data.get("tools") or [])
|
| 262 |
+
return scrubbed
|
| 263 |
+
|
| 264 |
+
|
| 265 |
+
def _write_row_payload(data: dict, tmp_path: str) -> None:
|
| 266 |
+
"""Single-row JSONL (existing format) — used by KPI scheduler."""
|
| 267 |
+
scrubbed = _scrub_session_for_upload(data)
|
| 268 |
+
session_row = {
|
| 269 |
+
"session_id": data["session_id"],
|
| 270 |
+
"user_id": data.get("user_id"),
|
| 271 |
+
"session_start_time": data["session_start_time"],
|
| 272 |
+
"session_end_time": data["session_end_time"],
|
| 273 |
+
"model_name": data["model_name"],
|
| 274 |
+
"total_cost_usd": data.get("total_cost_usd"),
|
| 275 |
+
"messages": json.dumps(scrubbed["messages"]),
|
| 276 |
+
"events": json.dumps(scrubbed["events"]),
|
| 277 |
+
"tools": json.dumps(scrubbed["tools"]),
|
| 278 |
+
}
|
| 279 |
+
|
| 280 |
+
with open(tmp_path, "w") as tmp:
|
| 281 |
+
json.dump(session_row, tmp)
|
| 282 |
+
|
| 283 |
+
|
| 284 |
+
def _write_claude_code_payload(data: dict, tmp_path: str) -> None:
|
| 285 |
+
"""Multi-line JSONL in Claude Code schema for the HF trace viewer."""
|
| 286 |
+
# Scrub before conversion so secrets never reach the upload temp file.
|
| 287 |
+
scrubbed = _scrub_session_for_upload(data)
|
| 288 |
+
events = to_claude_code_jsonl(scrubbed)
|
| 289 |
+
with open(tmp_path, "w") as tmp:
|
| 290 |
+
for event in events:
|
| 291 |
+
tmp.write(json.dumps(event))
|
| 292 |
+
tmp.write("\n")
|
| 293 |
+
|
| 294 |
+
|
| 295 |
+
def _status_field(format: str) -> str:
|
| 296 |
+
"""Per-format upload status field on the local trajectory file."""
|
| 297 |
+
return "personal_upload_status" if format == "claude_code" else "upload_status"
|
| 298 |
+
|
| 299 |
+
|
| 300 |
+
def _url_field(format: str) -> str:
|
| 301 |
+
return "personal_upload_url" if format == "claude_code" else "upload_url"
|
| 302 |
+
|
| 303 |
+
|
| 304 |
+
def _read_session_file(session_file: str) -> dict:
|
| 305 |
+
"""Read a local session file while respecting uploader file locks."""
|
| 306 |
+
import fcntl
|
| 307 |
+
|
| 308 |
+
with open(session_file, "r") as f:
|
| 309 |
+
fcntl.flock(f, fcntl.LOCK_SH)
|
| 310 |
+
try:
|
| 311 |
+
return json.load(f)
|
| 312 |
+
finally:
|
| 313 |
+
fcntl.flock(f, fcntl.LOCK_UN)
|
| 314 |
+
|
| 315 |
+
|
| 316 |
+
def _update_upload_status(
|
| 317 |
+
session_file: str,
|
| 318 |
+
status_key: str,
|
| 319 |
+
url_key: str,
|
| 320 |
+
status: str,
|
| 321 |
+
dataset_url: str | None = None,
|
| 322 |
+
) -> None:
|
| 323 |
+
"""Atomically update only this uploader's status fields.
|
| 324 |
+
|
| 325 |
+
The org and personal uploaders run as separate processes against the same
|
| 326 |
+
local session JSON file. Re-read under an exclusive lock so one uploader
|
| 327 |
+
cannot clobber fields written by the other.
|
| 328 |
+
"""
|
| 329 |
+
import fcntl
|
| 330 |
+
|
| 331 |
+
with open(session_file, "r+") as f:
|
| 332 |
+
fcntl.flock(f, fcntl.LOCK_EX)
|
| 333 |
+
try:
|
| 334 |
+
data = json.load(f)
|
| 335 |
+
data[status_key] = status
|
| 336 |
+
if dataset_url is not None:
|
| 337 |
+
data[url_key] = dataset_url
|
| 338 |
+
data["last_save_time"] = datetime.now().isoformat()
|
| 339 |
+
f.seek(0)
|
| 340 |
+
json.dump(data, f, indent=2)
|
| 341 |
+
f.truncate()
|
| 342 |
+
f.flush()
|
| 343 |
+
os.fsync(f.fileno())
|
| 344 |
+
finally:
|
| 345 |
+
fcntl.flock(f, fcntl.LOCK_UN)
|
| 346 |
+
|
| 347 |
+
|
| 348 |
+
def dataset_card_readme(repo_id: str) -> str:
|
| 349 |
+
"""Dataset card for personal ML Intern session trace repos."""
|
| 350 |
+
return f"""---
|
| 351 |
+
pretty_name: "ML Intern Session Traces"
|
| 352 |
+
language:
|
| 353 |
+
- en
|
| 354 |
+
license: other
|
| 355 |
+
task_categories:
|
| 356 |
+
- text-generation
|
| 357 |
+
tags:
|
| 358 |
+
- agent-traces
|
| 359 |
+
- coding-agent
|
| 360 |
+
- ml-intern
|
| 361 |
+
- session-traces
|
| 362 |
+
- claude-code
|
| 363 |
+
- hf-agent-trace-viewer
|
| 364 |
+
configs:
|
| 365 |
+
- config_name: default
|
| 366 |
+
data_files:
|
| 367 |
+
- split: train
|
| 368 |
+
path: "sessions/**/*.jsonl"
|
| 369 |
+
---
|
| 370 |
+
|
| 371 |
+
# ML Intern session traces
|
| 372 |
+
|
| 373 |
+
This dataset contains ML Intern coding agent session traces uploaded from local
|
| 374 |
+
ML Intern runs. The traces are stored as JSON Lines files under `sessions/`,
|
| 375 |
+
with one file per session.
|
| 376 |
+
|
| 377 |
+
## Links
|
| 378 |
+
|
| 379 |
+
- ML Intern demo: https://smolagents-ml-intern.hf.space
|
| 380 |
+
- ML Intern CLI: https://github.com/huggingface/ml-intern
|
| 381 |
+
|
| 382 |
+
## Data description
|
| 383 |
+
|
| 384 |
+
Each `*.jsonl` file contains a single ML Intern session converted to a
|
| 385 |
+
Claude-Code-style event stream for the Hugging Face Agent Trace Viewer. Entries
|
| 386 |
+
can include user messages, assistant messages, tool calls, tool results, model
|
| 387 |
+
metadata, and timestamps.
|
| 388 |
+
|
| 389 |
+
Session files are written to paths of the form:
|
| 390 |
+
|
| 391 |
+
```text
|
| 392 |
+
sessions/YYYY-MM-DD/<session_id>.jsonl
|
| 393 |
+
```
|
| 394 |
+
|
| 395 |
+
## Redaction and review
|
| 396 |
+
|
| 397 |
+
**WARNING: no comprehensive redaction or human review has been performed for this dataset.**
|
| 398 |
+
|
| 399 |
+
ML Intern applies automated best-effort scrubbing for common secret patterns
|
| 400 |
+
such as Hugging Face, Anthropic, OpenAI, GitHub, and AWS tokens before upload.
|
| 401 |
+
This is not a privacy guarantee.
|
| 402 |
+
|
| 403 |
+
These traces may contain sensitive information, including prompts, code,
|
| 404 |
+
terminal output, file paths, repository names, private task context, tool
|
| 405 |
+
outputs, or other data from the local development environment. Treat every
|
| 406 |
+
session as potentially sensitive.
|
| 407 |
+
|
| 408 |
+
Do not make this dataset public unless you have manually inspected the uploaded
|
| 409 |
+
sessions and are comfortable sharing their full contents.
|
| 410 |
+
|
| 411 |
+
## Limitations
|
| 412 |
+
|
| 413 |
+
Coding agent transcripts can include private or off-topic content, failed
|
| 414 |
+
experiments, credentials accidentally pasted by a user, and outputs copied from
|
| 415 |
+
local files or services. Use with appropriate caution, especially before
|
| 416 |
+
changing repository visibility.
|
| 417 |
+
"""
|
| 418 |
+
|
| 419 |
+
|
| 420 |
+
def _upload_dataset_card(api: Any, repo_id: str, token: str, format: str) -> None:
|
| 421 |
+
"""Create/update a README for personal trace datasets."""
|
| 422 |
+
if format != "claude_code":
|
| 423 |
+
return
|
| 424 |
+
|
| 425 |
+
api.upload_file(
|
| 426 |
+
path_or_fileobj=dataset_card_readme(repo_id).encode("utf-8"),
|
| 427 |
+
path_in_repo="README.md",
|
| 428 |
+
repo_id=repo_id,
|
| 429 |
+
repo_type="dataset",
|
| 430 |
+
token=token,
|
| 431 |
+
commit_message="Update dataset card",
|
| 432 |
+
)
|
| 433 |
|
| 434 |
|
| 435 |
def upload_session_as_file(
|
| 436 |
+
session_file: str,
|
| 437 |
+
repo_id: str,
|
| 438 |
+
max_retries: int = 3,
|
| 439 |
+
format: str = "row",
|
| 440 |
+
token_env: str | None = None,
|
| 441 |
+
private: bool = False,
|
| 442 |
) -> bool:
|
| 443 |
+
"""Upload a single session as an individual JSONL file (no race conditions).
|
|
|
|
| 444 |
|
| 445 |
Args:
|
| 446 |
session_file: Path to local session JSON file
|
| 447 |
repo_id: HuggingFace dataset repo ID
|
| 448 |
max_retries: Number of retry attempts
|
| 449 |
+
format: ``row`` (default, KPI-compatible) or ``claude_code`` (HF
|
| 450 |
+
Agent Trace Viewer compatible).
|
| 451 |
+
token_env: Name of the env var holding the HF token. ``None`` falls
|
| 452 |
+
back to the org-token chain (``HF_SESSION_UPLOAD_TOKEN`` →
|
| 453 |
+
``HF_TOKEN`` → ``HF_ADMIN_TOKEN``).
|
| 454 |
+
private: When creating the repo for the first time, mark it private.
|
| 455 |
|
| 456 |
Returns:
|
| 457 |
True if successful, False otherwise
|
|
|
|
| 462 |
print("Error: huggingface_hub library not available", file=sys.stderr)
|
| 463 |
return False
|
| 464 |
|
| 465 |
+
status_key = _status_field(format)
|
| 466 |
+
url_key = _url_field(format)
|
| 467 |
+
|
| 468 |
try:
|
| 469 |
+
data = _read_session_file(session_file)
|
|
|
|
|
|
|
| 470 |
|
| 471 |
+
# Skip if already uploaded for this format.
|
| 472 |
+
if data.get(status_key) == "success":
|
|
|
|
| 473 |
return True
|
| 474 |
|
| 475 |
+
hf_token = _resolve_token(token_env)
|
|
|
|
| 476 |
if not hf_token:
|
| 477 |
+
_update_upload_status(session_file, status_key, url_key, "failed")
|
|
|
|
|
|
|
|
|
|
| 478 |
return False
|
| 479 |
|
| 480 |
+
# Build temp upload payload in the requested format.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 481 |
import tempfile
|
| 482 |
|
| 483 |
with tempfile.NamedTemporaryFile(
|
| 484 |
mode="w", suffix=".jsonl", delete=False
|
| 485 |
) as tmp:
|
|
|
|
| 486 |
tmp_path = tmp.name
|
| 487 |
|
| 488 |
try:
|
| 489 |
+
if format == "claude_code":
|
| 490 |
+
_write_claude_code_payload(data, tmp_path)
|
| 491 |
+
else:
|
| 492 |
+
_write_row_payload(data, tmp_path)
|
| 493 |
+
|
| 494 |
session_id = data["session_id"]
|
| 495 |
date_str = datetime.fromisoformat(data["session_start_time"]).strftime(
|
| 496 |
"%Y-%m-%d"
|
| 497 |
)
|
| 498 |
repo_path = f"sessions/{date_str}/{session_id}.jsonl"
|
| 499 |
|
|
|
|
| 500 |
api = HfApi()
|
| 501 |
for attempt in range(max_retries):
|
| 502 |
try:
|
| 503 |
+
# Idempotent create — visibility is set on first creation
|
| 504 |
+
# only. Existing repos keep whatever the user picked via
|
| 505 |
+
# /share-traces.
|
| 506 |
try:
|
| 507 |
api.create_repo(
|
| 508 |
repo_id=repo_id,
|
| 509 |
repo_type="dataset",
|
| 510 |
+
private=private,
|
| 511 |
token=hf_token,
|
| 512 |
+
exist_ok=True,
|
| 513 |
)
|
|
|
|
| 514 |
except Exception:
|
|
|
|
| 515 |
pass
|
| 516 |
|
| 517 |
+
_upload_dataset_card(api, repo_id, hf_token, format)
|
| 518 |
+
|
| 519 |
api.upload_file(
|
| 520 |
path_or_fileobj=tmp_path,
|
| 521 |
path_in_repo=repo_path,
|
|
|
|
| 525 |
commit_message=f"Add session {session_id}",
|
| 526 |
)
|
| 527 |
|
| 528 |
+
_update_upload_status(
|
| 529 |
+
session_file,
|
| 530 |
+
status_key,
|
| 531 |
+
url_key,
|
| 532 |
+
"success",
|
| 533 |
+
f"https://huggingface.co/datasets/{repo_id}",
|
| 534 |
+
)
|
| 535 |
return True
|
| 536 |
|
| 537 |
except Exception:
|
|
|
|
| 541 |
wait_time = 2**attempt
|
| 542 |
time.sleep(wait_time)
|
| 543 |
else:
|
| 544 |
+
_update_upload_status(
|
| 545 |
+
session_file, status_key, url_key, "failed"
|
| 546 |
+
)
|
|
|
|
| 547 |
return False
|
| 548 |
|
| 549 |
finally:
|
|
|
|
| 550 |
try:
|
| 551 |
os.unlink(tmp_path)
|
| 552 |
except Exception:
|
|
|
|
| 557 |
return False
|
| 558 |
|
| 559 |
|
| 560 |
+
def retry_failed_uploads(
|
| 561 |
+
directory: str,
|
| 562 |
+
repo_id: str,
|
| 563 |
+
format: str = "row",
|
| 564 |
+
token_env: str | None = None,
|
| 565 |
+
private: bool = False,
|
| 566 |
+
):
|
| 567 |
+
"""Retry all failed/pending uploads in a directory for the given format."""
|
| 568 |
log_dir = Path(directory)
|
| 569 |
if not log_dir.exists():
|
| 570 |
return
|
| 571 |
|
| 572 |
+
status_key = _status_field(format)
|
| 573 |
session_files = list(log_dir.glob("session_*.json"))
|
| 574 |
|
| 575 |
for filepath in session_files:
|
| 576 |
try:
|
| 577 |
+
data = _read_session_file(str(filepath))
|
| 578 |
+
|
| 579 |
+
# Only retry pending or failed uploads. Files predating this
|
| 580 |
+
# field don't have it; treat unknown as "not yet attempted" for
|
| 581 |
+
# the row format (legacy behavior) and "skip" for claude_code
|
| 582 |
+
# so we don't suddenly re-upload pre-existing sessions to a
|
| 583 |
+
# newly-introduced personal repo.
|
| 584 |
+
status = data.get(status_key, "unknown")
|
| 585 |
+
if format == "claude_code" and status_key not in data:
|
| 586 |
+
continue
|
| 587 |
+
|
| 588 |
+
if status in ("pending", "failed", "unknown"):
|
| 589 |
+
upload_session_as_file(
|
| 590 |
+
str(filepath),
|
| 591 |
+
repo_id,
|
| 592 |
+
format=format,
|
| 593 |
+
token_env=token_env,
|
| 594 |
+
private=private,
|
| 595 |
+
)
|
| 596 |
|
| 597 |
except Exception:
|
| 598 |
pass
|
| 599 |
|
| 600 |
|
| 601 |
+
def _str2bool(v: str) -> bool:
|
| 602 |
+
return str(v).strip().lower() in {"1", "true", "yes", "on"}
|
| 603 |
+
|
| 604 |
+
|
| 605 |
if __name__ == "__main__":
|
| 606 |
+
parser = argparse.ArgumentParser(prog="session_uploader.py")
|
| 607 |
+
sub = parser.add_subparsers(dest="command", required=True)
|
| 608 |
+
|
| 609 |
+
p_upload = sub.add_parser("upload")
|
| 610 |
+
p_upload.add_argument("session_file")
|
| 611 |
+
p_upload.add_argument("repo_id")
|
| 612 |
+
p_upload.add_argument(
|
| 613 |
+
"--format",
|
| 614 |
+
choices=["row", "claude_code"],
|
| 615 |
+
default="row",
|
| 616 |
+
)
|
| 617 |
+
p_upload.add_argument(
|
| 618 |
+
"--token-env",
|
| 619 |
+
default=None,
|
| 620 |
+
help="Env var name holding the HF token (default: org fallback chain).",
|
| 621 |
+
)
|
| 622 |
+
p_upload.add_argument("--private", default="false")
|
| 623 |
+
|
| 624 |
+
p_retry = sub.add_parser("retry")
|
| 625 |
+
p_retry.add_argument("directory")
|
| 626 |
+
p_retry.add_argument("repo_id")
|
| 627 |
+
p_retry.add_argument(
|
| 628 |
+
"--format",
|
| 629 |
+
choices=["row", "claude_code"],
|
| 630 |
+
default="row",
|
| 631 |
+
)
|
| 632 |
+
p_retry.add_argument("--token-env", default=None)
|
| 633 |
+
p_retry.add_argument("--private", default="false")
|
| 634 |
+
|
| 635 |
+
args = parser.parse_args()
|
| 636 |
+
|
| 637 |
+
if args.command == "upload":
|
| 638 |
+
ok = upload_session_as_file(
|
| 639 |
+
args.session_file,
|
| 640 |
+
args.repo_id,
|
| 641 |
+
format=args.format,
|
| 642 |
+
token_env=args.token_env,
|
| 643 |
+
private=_str2bool(args.private),
|
| 644 |
+
)
|
| 645 |
+
sys.exit(0 if ok else 1)
|
| 646 |
+
|
| 647 |
+
if args.command == "retry":
|
| 648 |
+
retry_failed_uploads(
|
| 649 |
+
args.directory,
|
| 650 |
+
args.repo_id,
|
| 651 |
+
format=args.format,
|
| 652 |
+
token_env=args.token_env,
|
| 653 |
+
private=_str2bool(args.private),
|
| 654 |
+
)
|
| 655 |
sys.exit(0)
|
| 656 |
|
| 657 |
+
parser.print_help()
|
| 658 |
+
sys.exit(1)
|
|
|
|
@@ -807,10 +807,120 @@ async def _handle_slash_command(
|
|
| 807 |
print(f"Context items: {len(session.context_manager.items)}")
|
| 808 |
return None
|
| 809 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 810 |
print(f"Unknown command: {command}. Type /help for available commands.")
|
| 811 |
return None
|
| 812 |
|
| 813 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 814 |
async def main(model: str | None = None):
|
| 815 |
"""Interactive chat with the agent"""
|
| 816 |
|
|
|
|
| 807 |
print(f"Context items: {len(session.context_manager.items)}")
|
| 808 |
return None
|
| 809 |
|
| 810 |
+
if command == "/share-traces":
|
| 811 |
+
session = session_holder[0] if session_holder else None
|
| 812 |
+
await _handle_share_traces_command(arg, config, session)
|
| 813 |
+
return None
|
| 814 |
+
|
| 815 |
print(f"Unknown command: {command}. Type /help for available commands.")
|
| 816 |
return None
|
| 817 |
|
| 818 |
|
| 819 |
+
async def _handle_share_traces_command(arg: str, config, session) -> None:
|
| 820 |
+
"""Show or flip visibility of the user's personal trace dataset.
|
| 821 |
+
|
| 822 |
+
Uses the user's own HF_TOKEN (write-scoped to their namespace). Only
|
| 823 |
+
operates on the personal trace repo configured via
|
| 824 |
+
``personal_trace_repo_template`` — never touches the shared org dataset.
|
| 825 |
+
"""
|
| 826 |
+
from huggingface_hub import HfApi
|
| 827 |
+
from huggingface_hub.utils import HfHubHTTPError
|
| 828 |
+
|
| 829 |
+
console = get_console()
|
| 830 |
+
if session is None:
|
| 831 |
+
console.print("[bold red]No active session.[/bold red]")
|
| 832 |
+
return
|
| 833 |
+
|
| 834 |
+
repo_id = session._personal_trace_repo_id() if session is not None else None
|
| 835 |
+
if not repo_id:
|
| 836 |
+
if not getattr(config, "share_traces", False):
|
| 837 |
+
console.print(
|
| 838 |
+
"[yellow]share_traces is disabled in config. "
|
| 839 |
+
"Set it to true to publish per-session traces to your HF dataset."
|
| 840 |
+
"[/yellow]"
|
| 841 |
+
)
|
| 842 |
+
return
|
| 843 |
+
if not session.user_id:
|
| 844 |
+
console.print(
|
| 845 |
+
"[yellow]No HF username resolved \u2014 cannot pick a personal "
|
| 846 |
+
"trace repo. Set HF_TOKEN to a token tied to your account.[/yellow]"
|
| 847 |
+
)
|
| 848 |
+
return
|
| 849 |
+
console.print(
|
| 850 |
+
"[yellow]personal_trace_repo_template is unset \u2014 nothing to do.[/yellow]"
|
| 851 |
+
)
|
| 852 |
+
return
|
| 853 |
+
|
| 854 |
+
token = session.hf_token or resolve_hf_token()
|
| 855 |
+
if not token:
|
| 856 |
+
console.print(
|
| 857 |
+
"[bold red]No HF_TOKEN available.[/bold red] Cannot read or change "
|
| 858 |
+
"dataset visibility."
|
| 859 |
+
)
|
| 860 |
+
return
|
| 861 |
+
|
| 862 |
+
api = HfApi(token=token)
|
| 863 |
+
url = f"https://huggingface.co/datasets/{repo_id}"
|
| 864 |
+
target = arg.strip().lower()
|
| 865 |
+
|
| 866 |
+
if not target:
|
| 867 |
+
try:
|
| 868 |
+
info = await asyncio.to_thread(
|
| 869 |
+
api.repo_info, repo_id=repo_id, repo_type="dataset"
|
| 870 |
+
)
|
| 871 |
+
visibility = "private" if getattr(info, "private", False) else "public"
|
| 872 |
+
console.print(f"[bold]Trace dataset:[/bold] {url}")
|
| 873 |
+
console.print(f"[bold]Visibility:[/bold] {visibility}")
|
| 874 |
+
console.print(
|
| 875 |
+
"[dim]Use '/share-traces public' to publish, "
|
| 876 |
+
"'/share-traces private' to lock it back down.[/dim]"
|
| 877 |
+
)
|
| 878 |
+
except HfHubHTTPError as e:
|
| 879 |
+
if getattr(e.response, "status_code", None) == 404:
|
| 880 |
+
console.print(
|
| 881 |
+
f"[dim]Dataset {repo_id} doesn't exist yet \u2014 it'll be "
|
| 882 |
+
"created (private) on the next session save.[/dim]"
|
| 883 |
+
)
|
| 884 |
+
else:
|
| 885 |
+
console.print(f"[bold red]Hub error:[/bold red] {e}")
|
| 886 |
+
except Exception as e:
|
| 887 |
+
console.print(f"[bold red]Could not fetch dataset info:[/bold red] {e}")
|
| 888 |
+
return
|
| 889 |
+
|
| 890 |
+
if target not in {"public", "private"}:
|
| 891 |
+
console.print(
|
| 892 |
+
f"[bold red]Unknown argument:[/bold red] {target}. "
|
| 893 |
+
"Expected 'public' or 'private'."
|
| 894 |
+
)
|
| 895 |
+
return
|
| 896 |
+
|
| 897 |
+
private = target == "private"
|
| 898 |
+
try:
|
| 899 |
+
# Idempotent — create if missing so first-flip works even before any
|
| 900 |
+
# session has been saved yet.
|
| 901 |
+
await asyncio.to_thread(
|
| 902 |
+
api.create_repo,
|
| 903 |
+
repo_id=repo_id,
|
| 904 |
+
repo_type="dataset",
|
| 905 |
+
private=private,
|
| 906 |
+
token=token,
|
| 907 |
+
exist_ok=True,
|
| 908 |
+
)
|
| 909 |
+
await asyncio.to_thread(
|
| 910 |
+
api.update_repo_settings,
|
| 911 |
+
repo_id=repo_id,
|
| 912 |
+
repo_type="dataset",
|
| 913 |
+
private=private,
|
| 914 |
+
token=token,
|
| 915 |
+
)
|
| 916 |
+
except Exception as e:
|
| 917 |
+
console.print(f"[bold red]Failed to update visibility:[/bold red] {e}")
|
| 918 |
+
return
|
| 919 |
+
|
| 920 |
+
label = "PUBLIC" if not private else "private"
|
| 921 |
+
console.print(f"[green]Dataset is now {label}.[/green] {url}")
|
| 922 |
+
|
| 923 |
+
|
| 924 |
async def main(model: str | None = None):
|
| 925 |
"""Interactive chat with the agent"""
|
| 926 |
|
|
@@ -425,6 +425,7 @@ HELP_TEXT = f"""\
|
|
| 425 |
{_I} [cyan]/effort[/cyan] [level] Reasoning effort (minimal|low|medium|high|xhigh|max|off)
|
| 426 |
{_I} [cyan]/yolo[/cyan] Toggle auto-approve mode
|
| 427 |
{_I} [cyan]/status[/cyan] Current model & turn count
|
|
|
|
| 428 |
{_I} [cyan]/quit[/cyan] Exit"""
|
| 429 |
|
| 430 |
|
|
|
|
| 425 |
{_I} [cyan]/effort[/cyan] [level] Reasoning effort (minimal|low|medium|high|xhigh|max|off)
|
| 426 |
{_I} [cyan]/yolo[/cyan] Toggle auto-approve mode
|
| 427 |
{_I} [cyan]/status[/cyan] Current model & turn count
|
| 428 |
+
{_I} [cyan]/share-traces[/cyan] [public|private] Show/flip visibility of your HF trace dataset
|
| 429 |
{_I} [cyan]/quit[/cyan] Exit"""
|
| 430 |
|
| 431 |
|
|
@@ -138,6 +138,38 @@ async def _extract_user_from_token(token: str) -> dict[str, Any] | None:
|
|
| 138 |
return user
|
| 139 |
|
| 140 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 141 |
async def check_org_membership(token: str, org_name: str) -> bool:
|
| 142 |
"""Check if the token owner belongs to an HF org. Only caches positive results."""
|
| 143 |
now = time.time()
|
|
@@ -170,10 +202,10 @@ async def get_current_user(request: Request) -> dict[str, Any]:
|
|
| 170 |
1. Authorization: Bearer <token> header
|
| 171 |
2. hf_access_token cookie
|
| 172 |
|
| 173 |
-
In dev mode (AUTH_ENABLED=False),
|
| 174 |
"""
|
| 175 |
if not AUTH_ENABLED:
|
| 176 |
-
return
|
| 177 |
|
| 178 |
# Try Authorization header
|
| 179 |
token = bearer_token_from_header(request.headers.get("Authorization", ""))
|
|
|
|
| 138 |
return user
|
| 139 |
|
| 140 |
|
| 141 |
+
async def _dev_user_from_env() -> dict[str, Any]:
|
| 142 |
+
"""Use HF_TOKEN as the dev identity when available.
|
| 143 |
+
|
| 144 |
+
Local dev often runs without OAuth, but session trace uploads still need a
|
| 145 |
+
real HF namespace. Deriving the dev user from HF_TOKEN keeps local uploads
|
| 146 |
+
pointed at the token owner's dataset instead of dev/ml-intern-sessions.
|
| 147 |
+
"""
|
| 148 |
+
token = os.environ.get("HF_TOKEN", "")
|
| 149 |
+
if not token:
|
| 150 |
+
return DEV_USER
|
| 151 |
+
|
| 152 |
+
whoami = await fetch_whoami_v2(token)
|
| 153 |
+
if not isinstance(whoami, dict):
|
| 154 |
+
return DEV_USER
|
| 155 |
+
|
| 156 |
+
username = None
|
| 157 |
+
for key in ("name", "user", "preferred_username"):
|
| 158 |
+
value = whoami.get(key)
|
| 159 |
+
if isinstance(value, str) and value:
|
| 160 |
+
username = value
|
| 161 |
+
break
|
| 162 |
+
if not username:
|
| 163 |
+
return DEV_USER
|
| 164 |
+
|
| 165 |
+
return {
|
| 166 |
+
"user_id": username,
|
| 167 |
+
"username": username,
|
| 168 |
+
"authenticated": True,
|
| 169 |
+
"plan": await _fetch_user_plan(token),
|
| 170 |
+
}
|
| 171 |
+
|
| 172 |
+
|
| 173 |
async def check_org_membership(token: str, org_name: str) -> bool:
|
| 174 |
"""Check if the token owner belongs to an HF org. Only caches positive results."""
|
| 175 |
now = time.time()
|
|
|
|
| 202 |
1. Authorization: Bearer <token> header
|
| 203 |
2. hf_access_token cookie
|
| 204 |
|
| 205 |
+
In dev mode (AUTH_ENABLED=False), uses HF_TOKEN as the user when possible.
|
| 206 |
"""
|
| 207 |
if not AUTH_ENABLED:
|
| 208 |
+
return await _dev_user_from_env()
|
| 209 |
|
| 210 |
# Try Authorization header
|
| 211 |
token = bearer_token_from_header(request.headers.get("Authorization", ""))
|
|
@@ -2,6 +2,8 @@
|
|
| 2 |
"model_name": "anthropic/claude-opus-4-6",
|
| 3 |
"save_sessions": true,
|
| 4 |
"session_dataset_repo": "smolagents/ml-intern-sessions",
|
|
|
|
|
|
|
| 5 |
"yolo_mode": false,
|
| 6 |
"confirm_cpu_jobs": true,
|
| 7 |
"auto_file_upload": true,
|
|
|
|
| 2 |
"model_name": "anthropic/claude-opus-4-6",
|
| 3 |
"save_sessions": true,
|
| 4 |
"session_dataset_repo": "smolagents/ml-intern-sessions",
|
| 5 |
+
"share_traces": true,
|
| 6 |
+
"personal_trace_repo_template": "{hf_user}/ml-intern-sessions",
|
| 7 |
"yolo_mode": false,
|
| 8 |
"confirm_cpu_jobs": true,
|
| 9 |
"auto_file_upload": true,
|
|
@@ -2,6 +2,8 @@
|
|
| 2 |
"model_name": "${ML_INTERN_CLAUDE_MODEL_ID:-bedrock/us.anthropic.claude-opus-4-6-v1}",
|
| 3 |
"save_sessions": true,
|
| 4 |
"session_dataset_repo": "smolagents/ml-intern-sessions",
|
|
|
|
|
|
|
| 5 |
"yolo_mode": false,
|
| 6 |
"confirm_cpu_jobs": true,
|
| 7 |
"auto_file_upload": true,
|
|
|
|
| 2 |
"model_name": "${ML_INTERN_CLAUDE_MODEL_ID:-bedrock/us.anthropic.claude-opus-4-6-v1}",
|
| 3 |
"save_sessions": true,
|
| 4 |
"session_dataset_repo": "smolagents/ml-intern-sessions",
|
| 5 |
+
"share_traces": true,
|
| 6 |
+
"personal_trace_repo_template": "{hf_user}/ml-intern-sessions",
|
| 7 |
"yolo_mode": false,
|
| 8 |
"confirm_cpu_jobs": true,
|
| 9 |
"auto_file_upload": true,
|
|
@@ -28,6 +28,7 @@ def _make_cm() -> ContextManager:
|
|
| 28 |
cm.running_context_usage = 0
|
| 29 |
cm.untouched_messages = 5
|
| 30 |
cm.items = [Message(role="system", content="system")]
|
|
|
|
| 31 |
return cm
|
| 32 |
|
| 33 |
|
|
@@ -66,6 +67,15 @@ def test_no_orphan_means_no_stub():
|
|
| 66 |
assert tool_msgs[0].content == "ok"
|
| 67 |
|
| 68 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 69 |
def test_multiple_dangling_tool_calls_in_one_assistant_message_are_all_patched():
|
| 70 |
cm = _make_cm()
|
| 71 |
cm.items.extend([
|
|
|
|
| 28 |
cm.running_context_usage = 0
|
| 29 |
cm.untouched_messages = 5
|
| 30 |
cm.items = [Message(role="system", content="system")]
|
| 31 |
+
cm.on_message_added = None
|
| 32 |
return cm
|
| 33 |
|
| 34 |
|
|
|
|
| 67 |
assert tool_msgs[0].content == "ok"
|
| 68 |
|
| 69 |
|
| 70 |
+
def test_add_message_records_message_timestamp():
|
| 71 |
+
cm = _make_cm()
|
| 72 |
+
msg = Message(role="user", content="hello")
|
| 73 |
+
|
| 74 |
+
cm.add_message(msg)
|
| 75 |
+
|
| 76 |
+
assert getattr(cm.items[-1], "timestamp", None)
|
| 77 |
+
|
| 78 |
+
|
| 79 |
def test_multiple_dangling_tool_calls_in_one_assistant_message_are_all_patched():
|
| 80 |
cm = _make_cm()
|
| 81 |
cm.items.extend([
|
|
@@ -0,0 +1,202 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
|
| 3 |
+
from agent.core.session_uploader import (
|
| 4 |
+
_PERSONAL_TOKEN_ENV,
|
| 5 |
+
_resolve_token,
|
| 6 |
+
_update_upload_status,
|
| 7 |
+
_upload_dataset_card,
|
| 8 |
+
_write_claude_code_payload,
|
| 9 |
+
_write_row_payload,
|
| 10 |
+
dataset_card_readme,
|
| 11 |
+
to_claude_code_jsonl,
|
| 12 |
+
)
|
| 13 |
+
|
| 14 |
+
HF_SECRET = "hf_" + "a" * 30
|
| 15 |
+
ANTHROPIC_SECRET = "sk-ant-" + "b" * 24
|
| 16 |
+
GITHUB_SECRET = "ghp_" + "c" * 36
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
def test_dataset_card_readme_has_metadata_and_public_warning():
|
| 20 |
+
readme = dataset_card_readme("lewtun/ml-intern-sessions")
|
| 21 |
+
|
| 22 |
+
assert readme.startswith("---\n")
|
| 23 |
+
assert 'pretty_name: "ML Intern Session Traces"' in readme
|
| 24 |
+
assert "task_categories:\n- text-generation" in readme
|
| 25 |
+
assert "- agent-traces" in readme
|
| 26 |
+
assert "- coding-agent" in readme
|
| 27 |
+
assert "- ml-intern" in readme
|
| 28 |
+
assert 'path: "sessions/**/*.jsonl"' in readme
|
| 29 |
+
assert "ML Intern demo: https://smolagents-ml-intern.hf.space" in readme
|
| 30 |
+
assert "ML Intern CLI: https://github.com/huggingface/ml-intern" in readme
|
| 31 |
+
assert "Repository: https://huggingface.co/datasets/" not in readme
|
| 32 |
+
assert (
|
| 33 |
+
"**WARNING: no comprehensive redaction or human review has been performed for this dataset.**"
|
| 34 |
+
in readme
|
| 35 |
+
)
|
| 36 |
+
assert "automated best-effort scrubbing" in readme
|
| 37 |
+
assert "Do not make this dataset public" in readme
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
def test_upload_dataset_card_only_for_claude_code_format():
|
| 41 |
+
class FakeApi:
|
| 42 |
+
def __init__(self):
|
| 43 |
+
self.calls = []
|
| 44 |
+
|
| 45 |
+
def upload_file(self, **kwargs):
|
| 46 |
+
self.calls.append(kwargs)
|
| 47 |
+
|
| 48 |
+
api = FakeApi()
|
| 49 |
+
|
| 50 |
+
_upload_dataset_card(api, "lewtun/ml-intern-sessions", "hf_token", "row")
|
| 51 |
+
assert api.calls == []
|
| 52 |
+
|
| 53 |
+
_upload_dataset_card(api, "lewtun/ml-intern-sessions", "hf_token", "claude_code")
|
| 54 |
+
assert len(api.calls) == 1
|
| 55 |
+
assert api.calls[0]["path_in_repo"] == "README.md"
|
| 56 |
+
assert api.calls[0]["repo_id"] == "lewtun/ml-intern-sessions"
|
| 57 |
+
assert api.calls[0]["repo_type"] == "dataset"
|
| 58 |
+
assert api.calls[0]["token"] == "hf_token"
|
| 59 |
+
assert b"no comprehensive redaction or human review" in api.calls[0]["path_or_fileobj"]
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
def test_personal_token_env_takes_precedence_for_hf_token(monkeypatch):
|
| 63 |
+
monkeypatch.setenv(_PERSONAL_TOKEN_ENV, "personal-token")
|
| 64 |
+
monkeypatch.setenv("HF_TOKEN", "env-token")
|
| 65 |
+
|
| 66 |
+
assert _resolve_token("HF_TOKEN") == "personal-token"
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
def test_update_upload_status_preserves_other_uploader_fields(tmp_path):
|
| 70 |
+
session_file = tmp_path / "session_123.json"
|
| 71 |
+
session_file.write_text(
|
| 72 |
+
json.dumps(
|
| 73 |
+
{
|
| 74 |
+
"session_id": "123",
|
| 75 |
+
"upload_status": "success",
|
| 76 |
+
"upload_url": "https://huggingface.co/datasets/org/sessions",
|
| 77 |
+
"personal_upload_status": "pending",
|
| 78 |
+
}
|
| 79 |
+
)
|
| 80 |
+
)
|
| 81 |
+
|
| 82 |
+
_update_upload_status(
|
| 83 |
+
str(session_file),
|
| 84 |
+
"personal_upload_status",
|
| 85 |
+
"personal_upload_url",
|
| 86 |
+
"success",
|
| 87 |
+
"https://huggingface.co/datasets/user/ml-intern-sessions",
|
| 88 |
+
)
|
| 89 |
+
|
| 90 |
+
data = json.loads(session_file.read_text())
|
| 91 |
+
assert data["upload_status"] == "success"
|
| 92 |
+
assert data["upload_url"] == "https://huggingface.co/datasets/org/sessions"
|
| 93 |
+
assert data["personal_upload_status"] == "success"
|
| 94 |
+
assert (
|
| 95 |
+
data["personal_upload_url"]
|
| 96 |
+
== "https://huggingface.co/datasets/user/ml-intern-sessions"
|
| 97 |
+
)
|
| 98 |
+
|
| 99 |
+
|
| 100 |
+
def test_claude_code_jsonl_uses_message_timestamps():
|
| 101 |
+
events = to_claude_code_jsonl(
|
| 102 |
+
{
|
| 103 |
+
"session_id": "session-123",
|
| 104 |
+
"model_name": "anthropic/claude-opus-4-6",
|
| 105 |
+
"session_start_time": "2026-01-01T00:00:00",
|
| 106 |
+
"messages": [
|
| 107 |
+
{
|
| 108 |
+
"role": "user",
|
| 109 |
+
"content": "hello",
|
| 110 |
+
"timestamp": "2026-01-01T00:00:01",
|
| 111 |
+
},
|
| 112 |
+
{
|
| 113 |
+
"role": "assistant",
|
| 114 |
+
"content": "hi",
|
| 115 |
+
"timestamp": "2026-01-01T00:00:02",
|
| 116 |
+
},
|
| 117 |
+
{
|
| 118 |
+
"role": "tool",
|
| 119 |
+
"tool_call_id": "call-1",
|
| 120 |
+
"content": "ok",
|
| 121 |
+
"timestamp": "2026-01-01T00:00:03",
|
| 122 |
+
},
|
| 123 |
+
],
|
| 124 |
+
}
|
| 125 |
+
)
|
| 126 |
+
|
| 127 |
+
assert [event["timestamp"] for event in events] == [
|
| 128 |
+
"2026-01-01T00:00:01",
|
| 129 |
+
"2026-01-01T00:00:02",
|
| 130 |
+
"2026-01-01T00:00:03",
|
| 131 |
+
]
|
| 132 |
+
|
| 133 |
+
|
| 134 |
+
def test_row_payload_scrubs_messages_events_and_tools(tmp_path):
|
| 135 |
+
tmp_file = tmp_path / "row.jsonl"
|
| 136 |
+
data = {
|
| 137 |
+
"session_id": "session-123",
|
| 138 |
+
"user_id": "lewtun",
|
| 139 |
+
"session_start_time": "2026-01-01T00:00:00",
|
| 140 |
+
"session_end_time": "2026-01-01T00:00:03",
|
| 141 |
+
"model_name": "anthropic/claude-opus-4-6",
|
| 142 |
+
"total_cost_usd": 0.01,
|
| 143 |
+
"messages": [{"role": "user", "content": f"token {HF_SECRET}"}],
|
| 144 |
+
"events": [{"type": "debug", "content": f"key {ANTHROPIC_SECRET}"}],
|
| 145 |
+
"tools": [{"name": "bash", "env": f"GITHUB_TOKEN={GITHUB_SECRET}"}],
|
| 146 |
+
}
|
| 147 |
+
|
| 148 |
+
_write_row_payload(data, str(tmp_file))
|
| 149 |
+
|
| 150 |
+
payload = tmp_file.read_text()
|
| 151 |
+
assert HF_SECRET not in payload
|
| 152 |
+
assert ANTHROPIC_SECRET not in payload
|
| 153 |
+
assert GITHUB_SECRET not in payload
|
| 154 |
+
assert "[REDACTED_HF_TOKEN]" in payload
|
| 155 |
+
assert "[REDACTED_ANTHROPIC_KEY]" in payload
|
| 156 |
+
assert "GITHUB_TOKEN=[REDACTED]" in payload
|
| 157 |
+
|
| 158 |
+
|
| 159 |
+
def test_claude_code_payload_scrubs_messages_before_conversion(tmp_path):
|
| 160 |
+
tmp_file = tmp_path / "claude_code.jsonl"
|
| 161 |
+
data = {
|
| 162 |
+
"session_id": "session-123",
|
| 163 |
+
"model_name": "anthropic/claude-opus-4-6",
|
| 164 |
+
"session_start_time": "2026-01-01T00:00:00",
|
| 165 |
+
"messages": [
|
| 166 |
+
{
|
| 167 |
+
"role": "user",
|
| 168 |
+
"content": f"token {HF_SECRET}",
|
| 169 |
+
"timestamp": "2026-01-01T00:00:01",
|
| 170 |
+
},
|
| 171 |
+
{
|
| 172 |
+
"role": "assistant",
|
| 173 |
+
"content": "running tool",
|
| 174 |
+
"tool_calls": [
|
| 175 |
+
{
|
| 176 |
+
"id": "call-1",
|
| 177 |
+
"function": {
|
| 178 |
+
"name": "bash",
|
| 179 |
+
"arguments": json.dumps({"key": ANTHROPIC_SECRET}),
|
| 180 |
+
},
|
| 181 |
+
}
|
| 182 |
+
],
|
| 183 |
+
"timestamp": "2026-01-01T00:00:02",
|
| 184 |
+
},
|
| 185 |
+
{
|
| 186 |
+
"role": "tool",
|
| 187 |
+
"tool_call_id": "call-1",
|
| 188 |
+
"content": f"GITHUB_TOKEN={GITHUB_SECRET}",
|
| 189 |
+
"timestamp": "2026-01-01T00:00:03",
|
| 190 |
+
},
|
| 191 |
+
],
|
| 192 |
+
}
|
| 193 |
+
|
| 194 |
+
_write_claude_code_payload(data, str(tmp_file))
|
| 195 |
+
|
| 196 |
+
payload = tmp_file.read_text()
|
| 197 |
+
assert HF_SECRET not in payload
|
| 198 |
+
assert ANTHROPIC_SECRET not in payload
|
| 199 |
+
assert GITHUB_SECRET not in payload
|
| 200 |
+
assert "[REDACTED_HF_TOKEN]" in payload
|
| 201 |
+
assert "[REDACTED_ANTHROPIC_KEY]" in payload
|
| 202 |
+
assert "GITHUB_TOKEN=[REDACTED]" in payload
|